एमपीआई आईओ रीडिंग और राइटिंग ब्लॉक साइक्लिक मैट्रिक्स

मेरे पास एचपीसी वितरित सिस्टम पर मैट्रिक्स गुणा करने के लिए एक स्कूल प्रोजेक्ट है।एमपीआई आईओ रीडिंग और राइटिंग ब्लॉक साइक्लिक मैट्रिक्स

मुझे समानांतर आईओ सिस्टम से मैट्रिक्स में पढ़ने की आवश्यकता है और कई कंप्यूट नोड्स (प्रोसेसर) पर समानांतर में मैट्रिक्स गुणा करने के लिए pblacs का उपयोग करना है। MPI IO आदेशों का उपयोग करने के लिए डेटा को पढ़ा जाना चाहिए। मुझे पता है कि पीबीएलएक्स गुणा करने के लिए ब्लॉक चक्रीय वितरण का उपयोग करता है।

प्रोफेसर हमें एमपीआई आईओ पर नहीं दिया है ज्यादा जानकारी है, और मैं मुसीबत इस पर ज्यादा जानकारी/संसाधनों की खोज कर रहा हूँ। विशेष रूप से, क्या ब्लॉक चक्रीय तरीके से समांतर आईओ सिस्टम से मैट्रिक्स में पढ़ने के तरीके हैं और आसानी से इसे pblacs pdgemm में प्लग करते हैं?

किसी भी संकेत दिए गए उपयोगी संसाधनों को बहुत सराहना की जाएगी। मैं समय पर थोड़ा सा छोटा हूं, और इस परियोजना पर दिशा की कमी के साथ निराश हो रहा हूं।

स्रोत

2012-04-26 James Cotter

यह वास्तव में अपेक्षाकृत सरल है (यदि आप पहले से ही ब्लैक/स्केलपैक और एमपीआई-आईओ के बारे में कुछ जानते हैं!) लेकिन फिर भी दस्तावेज़ीकरण - यहां तक कि ऑनलाइन - जैसा कि आपने पाया है, कुछ हद तक खराब है।

एमपीआई-आईओ के बारे में जानने वाली पहली बात यह है कि यह आपको फ़ाइल के प्रत्येक प्रक्रिया के "दृश्य" को निर्दिष्ट करने के लिए सामान्य एमपीआई डेटा प्रकारों का उपयोग करने देता है, और फिर उस दृश्य में केवल डेटा को पढ़ता है। हमारे केंद्र में हमारे पास समानांतर आईओ पर आधे दिन के पाठ्यक्रम के लिए स्लाइड और स्रोत कोड है; पहला तीसरा या तो एमपीआई-आईओ की मूल बातें है। स्लाइड here और नमूना स्रोत कोड here हैं।

पता करने के लिए दूसरी बात यह एमपीआई "वितरित सरणी" डेटा प्रकार, यदि आप एक ब्लॉक चक्रीय डेटा वितरण की रूपरेखा की सुविधा देता है एक संयोजन जिनमें से बनाने के लिए एक अंतर्निहित तरीका है कि है, इस प्रश्न के उत्तर में सामान्य शब्दों में चर्चा की गई है: What is the difference between darray and subarray in mpi?।

तो इसका मतलब है कि यदि आपके पास एक बड़ी मैट्रिक्स वाली बाइनरी फ़ाइल है, तो आप इसे MPI_Type_create_darray का उपयोग करके mpi-io के साथ पढ़ सकते हैं और इसे ब्लॉक-चक्रीय तरीके से कार्यों द्वारा वितरित किया जाएगा। तो यह सिर्फ blacs या scalapack कॉल करने का मामला है। मैट्रिक्स वेक्टर गुणा बजाय psgemm के लिए scalapack psgemv का उपयोग करने का एक उदाहरण कार्यक्रम कम्प्यूटेशनल विज्ञान ढेर एक्सचेंज में कोई question करने के लिए अपने जवाब में सूचीबद्ध है।

आपको यह विचार करने के लिए कि टुकड़े एक साथ कैसे फिट होते हैं, निम्नलिखित एक साधारण प्रोग्राम है जो एक मैट्रिक्स युक्त एक बाइनरी फ़ाइल में पढ़ता है (पहले वर्ग मैट्रिक्स एन का आकार और फिर एन^2 तत्व) और फिर स्केलपैक (नया) pssyevr दिनचर्या का उपयोग करके eigenvalues और वैक्टर की गणना करता है। यह एमपीआई-आईओ, डेरे, और स्केलपैक सामान को जोड़ती है। यह फोरट्रान में है, लेकिन फ़ंक्शन कॉल सी-आधारित भाषाओं में समान हैं।

! 
! Use MPI-IO to read a diagonal matrix distributed block-cyclically, 
! use Scalapack to calculate its eigenvalues, and compare 
! to expected results. 
! 
program darray 
     use mpi 
     implicit none 

     integer :: n, nb ! problem size and block size 
     integer :: myArows, myAcols ! size of local subset of global array 
     real :: p 
     real, dimension(:), allocatable :: myA, myZ 
     real, dimension(:), allocatable :: work 
     integer, dimension(:), allocatable :: iwork 
     real, dimension(:), allocatable :: eigenvals 
     real, dimension(:), allocatable :: expected 
     integer :: worksize, totwork, iworksize 

     integer, external :: numroc ! blacs routine 
     integer :: me, procs, icontxt, prow, pcol, myrow, mycol ! blacs data 
     integer :: info ! scalapack return value 
     integer, dimension(9) :: ides_a, ides_z ! scalapack array desc 
     integer :: clock 
     real :: calctime, iotime 

     character(len=128) :: filename 
     integer :: mpirank 
     integer :: ierr 
     integer, dimension(2) :: pdims, dims, distribs, dargs 
     integer :: infile 
     integer, dimension(MPI_STATUS_SIZE) :: mpistatus 
     integer :: darray 
     integer :: locsize, nelements 
     integer(kind=MPI_ADDRESS_KIND) :: lb, locextent 
     integer(kind=MPI_OFFSET_KIND) :: disp 
     integer :: nargs 
     integer :: m, nz 

! Initialize MPI (for MPI-IO) 

     call MPI_Init(ierr) 
     call MPI_Comm_size(MPI_COMM_WORLD,procs,ierr) 
     call MPI_Comm_rank(MPI_COMM_WORLD,mpirank,ierr) 

! May as well get the process grid from MPI_Dims_create 
     pdims = 0 
     call MPI_Dims_create(procs, 2, pdims, ierr) 
     prow = pdims(1) 
     pcol = pdims(2) 

! get filename 
     nargs = command_argument_count() 
     if (nargs /= 1) then 
      print *,'Usage: darray filename' 
      print *,'  Where filename = name of binary matrix file.' 
      call MPI_Abort(MPI_COMM_WORLD,1,ierr) 
     endif 
     call get_command_argument(1, filename) 

! find the size of the array we'll be using 

     call tick(clock) 
     call MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, infile, ierr) 
     call MPI_File_read_all(infile,n,1,MPI_INTEGER,mpistatus,ierr) 
     call MPI_File_close(infile,ierr) 

! create the darray that will read in the data. 

     nb = 64 
     if (nb > (N/prow)) nb = N/prow 
     if (nb > (N/pcol)) nb = N/pcol 
     dims = [n,n] 
     distribs = [MPI_DISTRIBUTE_CYCLIC, MPI_DISTRIBUTE_CYCLIC] 
     dargs = [nb, nb] 

     call MPI_Type_create_darray(procs, mpirank, 2, dims, distribs, dargs, & 
            pdims, MPI_ORDER_FORTRAN, MPI_REAL, darray, ierr) 
     call MPI_Type_commit(darray,ierr) 

     call MPI_Type_size(darray, locsize, ierr) 
     nelements = locsize/4 
     call MPI_Type_get_extent(darray, lb, locextent, ierr) 

! Initialize local arrays  

     allocate(myA(nelements)) 
     allocate(myZ(nelements)) 
     allocate(eigenvals(n), expected(n)) 

! read in the data 
     call MPI_File_open(MPI_COMM_WORLD, trim(filename), MPI_MODE_RDONLY, MPI_INFO_NULL, infile, ierr) 
     disp = 4 ! skip N = 4 bytes 
     call MPI_File_set_view(infile, disp, MPI_REAL, darray, "native", MPI_INFO_NULL, ierr) 
     call MPI_File_read_all(infile, myA, nelements, MPI_REAL, mpistatus, ierr) 
     call MPI_File_close(infile,ierr) 

     iotime = tock(clock) 
     if (mpirank == 0) then 
      print *,'I/O time  = ', iotime 
     endif 

! Initialize blacs processor grid 

     call tick(clock) 
     call blacs_pinfo (me,procs) 

     call blacs_get  (-1, 0, icontxt) 
     call blacs_gridinit(icontxt, 'R', prow, pcol) 
     call blacs_gridinfo(icontxt, prow, pcol, myrow, mycol) 

     myArows = numroc(n, nb, myrow, 0, prow) 
     myAcols = numroc(n, nb, mycol, 0, pcol) 

! Construct local arrays 
! Global structure: matrix A of n rows and n columns 

! Prepare array descriptors for ScaLAPACK 
     call descinit(ides_a, n, n, nb, nb, 0, 0, icontxt, myArows, info) 
     call descinit(ides_z, n, n, nb, nb, 0, 0, icontxt, myArows, info) 

! Call ScaLAPACK library routine 

     allocate(work(1), iwork(1)) 
     iwork(1) = -1 
     work(1) = -1. 
     call pssyevr('V', 'A', 'U', n, myA, 1, 1, ides_a, -1.e20, +1.e20, 1, n, & 
        m, nz, eigenvals, myZ, 1, 1, ides_z, work, -1,   & 
        iwork, -1, info) 
     worksize = int(work(1))/2*3 
     iworksize = iwork(1)/2*3 
     print *, 'Local workspace ', worksize 
     call MPI_Reduce(worksize, totwork, 1, MPI_INTEGER, MPI_SUM, 0, MPI_COMM_WORLD, ierr) 
     if (mpirank == 0) print *, ' total work space ', totwork 
     call MPI_Reduce(iworksize, totwork, 1, MPI_INTEGER, MPI_SUM, 0, MPI_COMM_WORLD, ierr) 
     if (mpirank == 0) print *, ' total iwork space ', totwork 
     deallocate(work,iwork) 
     allocate(work(worksize),iwork(iworksize)) 
     call pssyev('N','U',n,myA,1,1,ides_a,eigenvals,myZ,1,1,ides_z,work,worksize,info) 
     if (info /= 0) then 
     print *, 'Error: info = ', info 
     else if (mpirank == 0) then 
     print *, 'Calculated ', m, ' eigenvalues and ', nz, ' eigenvectors.' 
     endif 

! Deallocate the local arrays 

     deallocate(myA, myZ) 
     deallocate(work, iwork) 

! End blacs for processors that are used 

     call blacs_gridexit(icontxt) 
     calctime = tock(clock) 

! calculated the expected eigenvalues for a particular test matrix 

     p = 3. + sqrt((4. * n - 3.) * (n - 1.)*3./(n+1.)) 
     expected(1) = p/(n*(5.-2.*n)) 
     expected(2) = 6./(p*(n + 1.)) 
     expected(3:n) = 1. 

! Print results 

     if (me == 0) then 
     if (info /= 0) then 
      print *, 'Error -- info = ', info 
     endif 
     print *,'Eigenvalues L_infty err = ', & 
      maxval(abs(eigenvals-expected)) 
     print *,'Compute time = ', calctime 
     endif 

     deallocate(eigenvals, expected) 

     call MPI_Finalize(ierr) 


contains 
    subroutine tick(t) 
     integer, intent(OUT) :: t 

     call system_clock(t) 
    end subroutine tick 

    ! returns time in seconds from now to time described by t 
    real function tock(t) 
     integer, intent(in) :: t 
     integer :: now, clock_rate 

     call system_clock(now,clock_rate) 

     tock = real(now - t)/real(clock_rate) 
    end function tock 

end program darray

स्रोत

2012-04-27 13:08:52

सच आपकी प्रतिक्रिया की सराहना करते हैं। मैंने पिछले दौर में व्यापक googling हाहा के बाद इस का एक अच्छा हिस्सा लगाया। एमपीआई में निर्मित चक्रीय वितरण बहुत आसान है !! –

एमपीआई आईओ रीडिंग और राइटिंग ब्लॉक साइक्लिक मैट्रिक्स

उत्तर

संबंधित मुद्दे