diff --git a/base/internals/psi_desc_impl.f90 b/base/internals/psi_desc_impl.f90 index 59377a08..deaf2a67 100644 --- a/base/internals/psi_desc_impl.f90 +++ b/base/internals/psi_desc_impl.f90 @@ -77,7 +77,8 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info) integer(psb_ipk_) :: debug_level, debug_unit logical, parameter :: debug=.false. character(len=20) :: name - integer(psb_ipk_), allocatable :: buf_rmt_rcv_bnd(:)[:], buf_rmt_snd_bnd(:)[:], buf_rmt_idx(:)[:] + integer(psb_ipk_), allocatable :: buf_rmt_rcv_bnd(:)[:], buf_rmt_snd_bnd(:)[:] + integer(psb_ipk_), allocatable :: buf_rmt_rcv_idx(:)[:], buf_rmt_snd_idx(:)[:] type(event_type), allocatable, save :: snd_done(:)[:] type(event_type), save :: rcv_done[*] name='psi_cnv_v2xch' @@ -126,7 +127,8 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info) if (allocated(snd_done)) deallocate(snd_done) allocate(buf_rmt_rcv_bnd(np*2)[*], buf_rmt_snd_bnd(np*2)[*], snd_done(np)[*]) - allocate(buf_rmt_idx(mxnrcv)[*]) + allocate(buf_rmt_rcv_idx(mxnrcv)[*]) + allocate(buf_rmt_snd_idx(mxnrcv)[*]) do if (ip > size(vidx_in)) then write(psb_err_unit,*) trim(name),': Warning: out of size of input vector ' @@ -145,14 +147,20 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info) xch_idx%loc_rcv_bnd(ixch+1) = xch_idx%loc_rcv_bnd(ixch) + nerv xch_idx%loc_snd_bnd(ixch+1) = xch_idx%loc_snd_bnd(ixch) + nesd img = xch_idx%prcs_xch(ixch) + 1 -!Here I am assuming that all the data exchange between two images takes place in one exchange + !Here I am assuming that all the data exchange between two images takes place in one exchange + sync images(img) buf_rmt_rcv_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_rcv_bnd(ixch:ixch+1) buf_rmt_snd_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_snd_bnd(ixch:ixch+1) - buf_rmt_idx(1:nesd)[img] = xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) +!!$ if (img == 2) write(*,*) this_image(),'Send idx to 2:',& +!!$ & xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) + buf_rmt_rcv_idx(1:nesd)[img] = xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) + buf_rmt_snd_idx(1:nerv)[img] = xch_idx%loc_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) event post(snd_done(me)[img]) event wait(snd_done(img)) xch_idx%rmt_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) = & - & buf_rmt_idx(1:nerv) + & buf_rmt_rcv_idx(1:nerv) + xch_idx%rmt_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) = & + & buf_rmt_snd_idx(1:nesd) xch_idx%rmt_rcv_bnd(ixch,1:2)=buf_rmt_rcv_bnd(img*2 - 1 : img*2) xch_idx%rmt_snd_bnd(ixch,1:2)=buf_rmt_snd_bnd(img*2 - 1 : img*2) ip = ip+nerv+nesd+3 diff --git a/base/internals/psi_desc_index.F90 b/base/internals/psi_desc_index.F90 index d7c22b9a..422a6868 100644 --- a/base/internals/psi_desc_index.F90 +++ b/base/internals/psi_desc_index.F90 @@ -187,9 +187,13 @@ subroutine psi_desc_index(desc,index_in,dep_list,& i = i + nerv + 1 end do ihinsz=i + if (if_caf) then call caf_alltoall(sdsz, rvsz,1, minfo) + else + call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1,psb_mpi_def_integer,icomm,minfo) + endif if (minfo /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='caf_alltoall') + call psb_errpush(psb_err_from_subroutine_,name,a_err='mpi_alltoall') goto 9999 end if @@ -293,9 +297,10 @@ subroutine psi_desc_index(desc,index_in,dep_list,& idxr = idxr + rvsz(proc+1) end do - call caf_alltoallv(sndbuf, sdsz, bsdindx, rcvbuf, rvsz, brvindx, minfo) + call mpi_alltoallv(sndbuf,sdsz,bsdindx,psb_mpi_ipk_integer,& + & rcvbuf,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo) if (minfo /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='caf_alltoallv') + call psb_errpush(psb_err_from_subroutine_,name,a_err='mpi_alltoallv') goto 9999 end if diff --git a/base/modules/desc/psb_desc_mod.F90 b/base/modules/desc/psb_desc_mod.F90 index acb39d0d..0185b0fe 100644 --- a/base/modules/desc/psb_desc_mod.F90 +++ b/base/modules/desc/psb_desc_mod.F90 @@ -205,6 +205,7 @@ module psb_desc_mod integer(psb_ipk_), allocatable :: loc_rcv_bnd(:) integer(psb_ipk_), allocatable :: loc_snd_bnd(:) integer(psb_ipk_), allocatable :: rmt_rcv_idx(:) + integer(psb_ipk_), allocatable :: rmt_snd_idx(:) integer(psb_ipk_), allocatable :: loc_rcv_idx(:) integer(psb_ipk_), allocatable :: loc_snd_idx(:) integer(psb_ipk_) :: max_buffer_size=0 diff --git a/include/Make.inc.psblas b/include/Make.inc.psblas index 2287943c..47ed05de 100644 --- a/include/Make.inc.psblas +++ b/include/Make.inc.psblas @@ -41,11 +41,11 @@ RANLIB=ranlib INSTALL=/usr/bin/install -c INSTALL_DATA=${INSTALL} -m 644 -INSTALL_DIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107 -INSTALL_LIBDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/lib -INSTALL_INCLUDEDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/include -INSTALL_DOCSDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/docs -INSTALL_SAMPLESDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/samples +INSTALL_DIR=/mnt/gpfs0/home/e802756/NUMERICAL/DEV/8.0.0-caf-xp +INSTALL_LIBDIR=/mnt/gpfs0/home/e802756/NUMERICAL/DEV/8.0.0-caf-xp/lib +INSTALL_INCLUDEDIR=/mnt/gpfs0/home/e802756/NUMERICAL/DEV/8.0.0-caf-xp/include +INSTALL_DOCSDIR=/mnt/gpfs0/home/e802756/NUMERICAL/DEV/8.0.0-caf-xp/docs +INSTALL_SAMPLESDIR=/mnt/gpfs0/home/e802756/NUMERICAL/DEV/8.0.0-caf-xp/samples # the following is the flag for /bin/cp which shall copy the file only for updating (timestamp based)--on GNU Linux, '-u' CPUPDFLAG= diff --git a/test/kernel/pdgenspmv.f90 b/test/kernel/pdgenspmv.f90 index 98e4815b..027d0970 100644 --- a/test/kernel/pdgenspmv.f90 +++ b/test/kernel/pdgenspmv.f90 @@ -51,9 +51,10 @@ program pdgenspmv ! descriptor type(psb_desc_type) :: desc_a ! dense matrices - type(psb_d_vect_type) :: xv,bv, vtst,bvh - real(psb_dpk_), allocatable :: tst(:), work(:) + type(psb_d_vect_type) :: xv,bv, vtst,bvh, xvh + real(psb_dpk_), allocatable :: tst(:), work(:), temp(:) real(psb_dpk_), allocatable :: xvc(:)[:], bvc(:)[:] + type(event_type), allocatable :: ready[:] ! blacs parameters integer(psb_ipk_) :: ictxt, iam, np ! type(psb_d_csre_sparse_mat) :: acsre @@ -62,9 +63,9 @@ program pdgenspmv integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, nr,nrl,ncl, lwork, nclmx integer(psb_ipk_) :: ip, img, nxch, p1,p2 integer(psb_long_int_k_) :: amatsize, precsize, descsize, d2size, annz, nbytes, ahnnz - real(psb_dpk_) :: err, eps + real(psb_dpk_) :: err, eps, err2 integer(psb_ipk_) :: times - integer(psb_ipk_), parameter :: iwarm=10 + integer(psb_ipk_), parameter :: iwarm=2 ! other variables integer(psb_ipk_) :: info, i @@ -73,7 +74,7 @@ program pdgenspmv info=psb_success_ - + call psb_init(ictxt) call psb_info(ictxt,iam,np) @@ -117,8 +118,8 @@ program pdgenspmv if (iam == psb_root_) write(psb_out_unit,'(" ")') !!$ write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'testmat-',idim,'-',np,'-',iam,'.mtx' !!$ call a%print(fname,head="psb-testing") - - + + ! ! Coarrays for experiment ! @@ -126,8 +127,11 @@ program pdgenspmv ncl = desc_a%get_local_cols() nclmx = ncl call psb_amx(ictxt,nclmx) +!!$ write(*,*) iam,'NCLMX',nclmx,ncl allocate(xvc(nclmx)[*]) allocate(bvc(nclmx)[*]) + allocate(ready[*]) + allocate(temp(nclmx)) xvc(1:ncl) = done *(/(i,i=1,ncl)/) bvc(:) = dzero call xv%set(xvc(1:ncl)) @@ -137,7 +141,7 @@ program pdgenspmv call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n') end do - + ! FIXME: cache flush needed here do i=1,iwarm call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n') @@ -150,46 +154,81 @@ program pdgenspmv call psb_barrier(ictxt) th = psb_wtime() - tt1 call psb_amx(ictxt,th) - + if (.true.) then - associate(xchg => desc_a%halo_xch) - ! FIXME: cache flush needed here - nxch = size(xchg%prcs_xch) - write(0,*) this_image(),nxch,nrl,ncl,' Exchanging with ',xchg%prcs_xch+1 - do i=1,iwarm - ! Sync images - sync images(xchg%prcs_xch+1) - do ip = 1, nxch - img = xchg%prcs_xch(ip) + 1 - p1 = xchg%loc_rcv_bnd(ip) - p2 = xchg%loc_rcv_bnd(ip+1)-1 - write(0,*) this_image(),'Boundaries ',p1,p2,' :',xchg%loc_rcv_idx(p1:p2),':',xchg%rmt_rcv_idx(p1:p2) - xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] + associate(xchg => desc_a%halo_xch) + ! FIXME: cache flush needed here + nxch = size(xchg%prcs_xch) +!!$ if (this_image()==2) write(0,*) this_image(),nxch,nrl,ncl,' Exchanging with ',xchg%prcs_xch+1 + do i=1,iwarm + temp = 0.d0 + xvc(nrl+1:) = 0.d0 + ! Sync images + !sync images(xchg%prcs_xch+1) + do ip=1,nxch + img = xchg%prcs_xch(ip) + 1 + event post(ready[img]) + end do + event wait(ready, until_count=nxch) + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 +!!$ if (this_image()==2) write(0,*) this_image(),'Boundaries ',& +!!$ & p1,p2,' :',xchg%loc_rcv_idx(p1:p2),':',xchg%rmt_rcv_idx(p1:p2) +!!$ xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] + + temp(p1:p2) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] +!!$ xvc(xchg%loc_rcv_idx(p1:p2)) = temp(p1:p2) +!!$ if (this_image()==2) write(0,*) this_image(),' :x: ',ip,' : ',& +!!$ &xvc(xchg%loc_rcv_idx(p1:p2)),' : ',xv%v%v(xchg%loc_rcv_idx(p1:p2)) + end do +!!$ if (this_image()==2) write(0,*) this_image(),' :x: ',& +!!$ &xvc(nrl+1:ncl),' : ',xv%v%v(nrl+1:ncl) + call a%csmv(done,xvc,dzero,bvc,info) end do - call a%csmv(done,xvc,dzero,bvc,info) - end do - call psb_barrier(ictxt) - tt1 = psb_wtime() - do i=1,times - ! Sync images - sync images(xchg%prcs_xch+1) - do ip = 1, nxch - img = xchg%prcs_xch(ip) + 1 - p1 = xchg%loc_rcv_bnd(ip) - p2 = xchg%loc_rcv_bnd(ip+1)-1 - !xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] + call psb_barrier(ictxt) + tt1 = psb_wtime() + do i=1,times + ! Sync images +! sync images(xchg%prcs_xch+1) +!!$ do ip=1,nxch +!!$ img = xchg%prcs_xch(ip) + 1 +!!$ event post(ready[img]) +!!$ end do +!!$ event wait(ready, until_count=nxch) + + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + sync images (img) + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 + !xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] + temp(p1:p2) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] + xvc(xchg%loc_rcv_idx(p1:p2)) = temp(p1:p2) + end do + call a%csmv(done,xvc,dzero,bvc,info) end do - call a%csmv(done,xvc,dzero,bvc,info) - end do - call psb_barrier(ictxt) - tt2 = psb_wtime() - tt1 - call psb_amx(ictxt,tt2) - end associate - call bvh%set(bvc(1:ncl)) + call psb_barrier(ictxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ictxt,tt2) + end associate + call bvh%set(bvc(1:ncl)) + call xvh%set(xvc(1:ncl)) endif +!!$ do i=2,2 +!!$ sync all +!!$ if (i==this_image()) then +!!$ write(0,*) this_image(),' Xdiff ',xv%v%v(nrl+1:ncl) -xvc(nrl+1:ncl) +!!$ write(0,*) this_image(),' X1 ',xv%v%v(nrl+1:ncl) +!!$ write(0,*) this_image(),' X2 ',xvc(nrl+1:ncl) +!!$ end if +!!$ end do call psb_geaxpby(-done,bv,done,bvh,desc_a,info) err = psb_genrm2(bvh,desc_a,info) + call psb_geaxpby(-done,xv,done,xvh,desc_a,info) + err2 = psb_genrm2(xvh,desc_a,info) nr = desc_a%get_global_rows() annz = a%get_nzeros() @@ -222,6 +261,7 @@ program pdgenspmv write(psb_out_unit,'("MFLOPS : ",F20.3)') tflops/1.d6 write(psb_out_unit,'("Difference : ",E20.12)') err + write(psb_out_unit,'("Difference : ",E20.12)') err2 ! ! This computation is valid for CSR @@ -234,9 +274,9 @@ program pdgenspmv write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%get_fmt() write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize - + end if - + ! ! cleanup storage and exit @@ -256,7 +296,7 @@ program pdgenspmv stop 9999 call psb_error(ictxt) - + stop contains @@ -280,7 +320,7 @@ contains call psb_bcast(ictxt,afmt) call psb_bcast(ictxt,idim) call psb_bcast(ictxt,times) - + if (iam == 0) then write(psb_out_unit,'("Testing matrix : ell1")') write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim @@ -368,4 +408,4 @@ contains g = exp(y**2-z**2) end if end function g -end program pdgenspmv +end program diff --git a/test/kernel/runs/spmv.inp b/test/kernel/runs/spmv.inp index 3e4c5f79..e0656746 100644 --- a/test/kernel/runs/spmv.inp +++ b/test/kernel/runs/spmv.inp @@ -1,3 +1,3 @@ CSR -008 -80 +200 +40 \ No newline at end of file diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index ae24cfca..5f9e7667 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -1,4 +1,4 @@ -8 Number of entries below this +7 Number of entries below this BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES BJAC Preconditioner NONE DIAG BJAC CSR Storage format for matrix A: CSR COO JAD @@ -7,5 +7,5 @@ CSR Storage format for matrix A: CSR COO JAD 1000 MAXIT -1 ITRACE 002 IRST restart for RGMRES and BiCGSTABL -10 NTESTS +