Version that works (produces correct results).

SF
psblas-caf-xp
sfilippone 9 years ago
parent 3adc9f4775
commit e684fe27fb

@ -77,7 +77,8 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
character(len=20) :: name character(len=20) :: name
integer(psb_ipk_), allocatable :: buf_rmt_rcv_bnd(:)[:], buf_rmt_snd_bnd(:)[:], buf_rmt_idx(:)[:] integer(psb_ipk_), allocatable :: buf_rmt_rcv_bnd(:)[:], buf_rmt_snd_bnd(:)[:]
integer(psb_ipk_), allocatable :: buf_rmt_rcv_idx(:)[:], buf_rmt_snd_idx(:)[:]
type(event_type), allocatable, save :: snd_done(:)[:] type(event_type), allocatable, save :: snd_done(:)[:]
type(event_type), save :: rcv_done[*] type(event_type), save :: rcv_done[*]
name='psi_cnv_v2xch' name='psi_cnv_v2xch'
@ -126,7 +127,8 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
if (allocated(snd_done)) deallocate(snd_done) if (allocated(snd_done)) deallocate(snd_done)
allocate(buf_rmt_rcv_bnd(np*2)[*], buf_rmt_snd_bnd(np*2)[*], snd_done(np)[*]) allocate(buf_rmt_rcv_bnd(np*2)[*], buf_rmt_snd_bnd(np*2)[*], snd_done(np)[*])
allocate(buf_rmt_idx(mxnrcv)[*]) allocate(buf_rmt_rcv_idx(mxnrcv)[*])
allocate(buf_rmt_snd_idx(mxnrcv)[*])
do do
if (ip > size(vidx_in)) then if (ip > size(vidx_in)) then
write(psb_err_unit,*) trim(name),': Warning: out of size of input vector ' write(psb_err_unit,*) trim(name),': Warning: out of size of input vector '
@ -145,14 +147,20 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
xch_idx%loc_rcv_bnd(ixch+1) = xch_idx%loc_rcv_bnd(ixch) + nerv xch_idx%loc_rcv_bnd(ixch+1) = xch_idx%loc_rcv_bnd(ixch) + nerv
xch_idx%loc_snd_bnd(ixch+1) = xch_idx%loc_snd_bnd(ixch) + nesd xch_idx%loc_snd_bnd(ixch+1) = xch_idx%loc_snd_bnd(ixch) + nesd
img = xch_idx%prcs_xch(ixch) + 1 img = xch_idx%prcs_xch(ixch) + 1
!Here I am assuming that all the data exchange between two images takes place in one exchange !Here I am assuming that all the data exchange between two images takes place in one exchange
sync images(img)
buf_rmt_rcv_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_rcv_bnd(ixch:ixch+1) buf_rmt_rcv_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_rcv_bnd(ixch:ixch+1)
buf_rmt_snd_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_snd_bnd(ixch:ixch+1) buf_rmt_snd_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_snd_bnd(ixch:ixch+1)
buf_rmt_idx(1:nesd)[img] = xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) !!$ if (img == 2) write(*,*) this_image(),'Send idx to 2:',&
!!$ & xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1)
buf_rmt_rcv_idx(1:nesd)[img] = xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1)
buf_rmt_snd_idx(1:nerv)[img] = xch_idx%loc_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1)
event post(snd_done(me)[img]) event post(snd_done(me)[img])
event wait(snd_done(img)) event wait(snd_done(img))
xch_idx%rmt_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) = & xch_idx%rmt_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) = &
& buf_rmt_idx(1:nerv) & buf_rmt_rcv_idx(1:nerv)
xch_idx%rmt_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) = &
& buf_rmt_snd_idx(1:nesd)
xch_idx%rmt_rcv_bnd(ixch,1:2)=buf_rmt_rcv_bnd(img*2 - 1 : img*2) xch_idx%rmt_rcv_bnd(ixch,1:2)=buf_rmt_rcv_bnd(img*2 - 1 : img*2)
xch_idx%rmt_snd_bnd(ixch,1:2)=buf_rmt_snd_bnd(img*2 - 1 : img*2) xch_idx%rmt_snd_bnd(ixch,1:2)=buf_rmt_snd_bnd(img*2 - 1 : img*2)
ip = ip+nerv+nesd+3 ip = ip+nerv+nesd+3

@ -187,9 +187,13 @@ subroutine psi_desc_index(desc,index_in,dep_list,&
i = i + nerv + 1 i = i + nerv + 1
end do end do
ihinsz=i ihinsz=i
if (if_caf) then
call caf_alltoall(sdsz, rvsz,1, minfo) call caf_alltoall(sdsz, rvsz,1, minfo)
else
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1,psb_mpi_def_integer,icomm,minfo)
endif
if (minfo /= psb_success_) then if (minfo /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='caf_alltoall') call psb_errpush(psb_err_from_subroutine_,name,a_err='mpi_alltoall')
goto 9999 goto 9999
end if end if
@ -293,9 +297,10 @@ subroutine psi_desc_index(desc,index_in,dep_list,&
idxr = idxr + rvsz(proc+1) idxr = idxr + rvsz(proc+1)
end do end do
call caf_alltoallv(sndbuf, sdsz, bsdindx, rcvbuf, rvsz, brvindx, minfo) call mpi_alltoallv(sndbuf,sdsz,bsdindx,psb_mpi_ipk_integer,&
& rcvbuf,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo)
if (minfo /= psb_success_) then if (minfo /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='caf_alltoallv') call psb_errpush(psb_err_from_subroutine_,name,a_err='mpi_alltoallv')
goto 9999 goto 9999
end if end if

@ -205,6 +205,7 @@ module psb_desc_mod
integer(psb_ipk_), allocatable :: loc_rcv_bnd(:) integer(psb_ipk_), allocatable :: loc_rcv_bnd(:)
integer(psb_ipk_), allocatable :: loc_snd_bnd(:) integer(psb_ipk_), allocatable :: loc_snd_bnd(:)
integer(psb_ipk_), allocatable :: rmt_rcv_idx(:) integer(psb_ipk_), allocatable :: rmt_rcv_idx(:)
integer(psb_ipk_), allocatable :: rmt_snd_idx(:)
integer(psb_ipk_), allocatable :: loc_rcv_idx(:) integer(psb_ipk_), allocatable :: loc_rcv_idx(:)
integer(psb_ipk_), allocatable :: loc_snd_idx(:) integer(psb_ipk_), allocatable :: loc_snd_idx(:)
integer(psb_ipk_) :: max_buffer_size=0 integer(psb_ipk_) :: max_buffer_size=0

@ -41,11 +41,11 @@ RANLIB=ranlib
INSTALL=/usr/bin/install -c INSTALL=/usr/bin/install -c
INSTALL_DATA=${INSTALL} -m 644 INSTALL_DATA=${INSTALL} -m 644
INSTALL_DIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107 INSTALL_DIR=/mnt/gpfs0/home/e802756/NUMERICAL/DEV/8.0.0-caf-xp
INSTALL_LIBDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/lib INSTALL_LIBDIR=/mnt/gpfs0/home/e802756/NUMERICAL/DEV/8.0.0-caf-xp/lib
INSTALL_INCLUDEDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/include INSTALL_INCLUDEDIR=/mnt/gpfs0/home/e802756/NUMERICAL/DEV/8.0.0-caf-xp/include
INSTALL_DOCSDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/docs INSTALL_DOCSDIR=/mnt/gpfs0/home/e802756/NUMERICAL/DEV/8.0.0-caf-xp/docs
INSTALL_SAMPLESDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/samples INSTALL_SAMPLESDIR=/mnt/gpfs0/home/e802756/NUMERICAL/DEV/8.0.0-caf-xp/samples
# the following is the flag for /bin/cp which shall copy the file only for updating (timestamp based)--on GNU Linux, '-u' # the following is the flag for /bin/cp which shall copy the file only for updating (timestamp based)--on GNU Linux, '-u'
CPUPDFLAG= CPUPDFLAG=

@ -51,9 +51,10 @@ program pdgenspmv
! descriptor ! descriptor
type(psb_desc_type) :: desc_a type(psb_desc_type) :: desc_a
! dense matrices ! dense matrices
type(psb_d_vect_type) :: xv,bv, vtst,bvh type(psb_d_vect_type) :: xv,bv, vtst,bvh, xvh
real(psb_dpk_), allocatable :: tst(:), work(:) real(psb_dpk_), allocatable :: tst(:), work(:), temp(:)
real(psb_dpk_), allocatable :: xvc(:)[:], bvc(:)[:] real(psb_dpk_), allocatable :: xvc(:)[:], bvc(:)[:]
type(event_type), allocatable :: ready[:]
! blacs parameters ! blacs parameters
integer(psb_ipk_) :: ictxt, iam, np integer(psb_ipk_) :: ictxt, iam, np
! type(psb_d_csre_sparse_mat) :: acsre ! type(psb_d_csre_sparse_mat) :: acsre
@ -62,9 +63,9 @@ program pdgenspmv
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, nr,nrl,ncl, lwork, nclmx integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, nr,nrl,ncl, lwork, nclmx
integer(psb_ipk_) :: ip, img, nxch, p1,p2 integer(psb_ipk_) :: ip, img, nxch, p1,p2
integer(psb_long_int_k_) :: amatsize, precsize, descsize, d2size, annz, nbytes, ahnnz integer(psb_long_int_k_) :: amatsize, precsize, descsize, d2size, annz, nbytes, ahnnz
real(psb_dpk_) :: err, eps real(psb_dpk_) :: err, eps, err2
integer(psb_ipk_) :: times integer(psb_ipk_) :: times
integer(psb_ipk_), parameter :: iwarm=10 integer(psb_ipk_), parameter :: iwarm=2
! other variables ! other variables
integer(psb_ipk_) :: info, i integer(psb_ipk_) :: info, i
@ -73,7 +74,7 @@ program pdgenspmv
info=psb_success_ info=psb_success_
call psb_init(ictxt) call psb_init(ictxt)
call psb_info(ictxt,iam,np) call psb_info(ictxt,iam,np)
@ -117,8 +118,8 @@ program pdgenspmv
if (iam == psb_root_) write(psb_out_unit,'(" ")') if (iam == psb_root_) write(psb_out_unit,'(" ")')
!!$ write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'testmat-',idim,'-',np,'-',iam,'.mtx' !!$ write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'testmat-',idim,'-',np,'-',iam,'.mtx'
!!$ call a%print(fname,head="psb-testing") !!$ call a%print(fname,head="psb-testing")
! !
! Coarrays for experiment ! Coarrays for experiment
! !
@ -126,8 +127,11 @@ program pdgenspmv
ncl = desc_a%get_local_cols() ncl = desc_a%get_local_cols()
nclmx = ncl nclmx = ncl
call psb_amx(ictxt,nclmx) call psb_amx(ictxt,nclmx)
!!$ write(*,*) iam,'NCLMX',nclmx,ncl
allocate(xvc(nclmx)[*]) allocate(xvc(nclmx)[*])
allocate(bvc(nclmx)[*]) allocate(bvc(nclmx)[*])
allocate(ready[*])
allocate(temp(nclmx))
xvc(1:ncl) = done *(/(i,i=1,ncl)/) xvc(1:ncl) = done *(/(i,i=1,ncl)/)
bvc(:) = dzero bvc(:) = dzero
call xv%set(xvc(1:ncl)) call xv%set(xvc(1:ncl))
@ -137,7 +141,7 @@ program pdgenspmv
call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n') call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n')
end do end do
! FIXME: cache flush needed here ! FIXME: cache flush needed here
do i=1,iwarm do i=1,iwarm
call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n') call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n')
@ -150,46 +154,81 @@ program pdgenspmv
call psb_barrier(ictxt) call psb_barrier(ictxt)
th = psb_wtime() - tt1 th = psb_wtime() - tt1
call psb_amx(ictxt,th) call psb_amx(ictxt,th)
if (.true.) then if (.true.) then
associate(xchg => desc_a%halo_xch) associate(xchg => desc_a%halo_xch)
! FIXME: cache flush needed here ! FIXME: cache flush needed here
nxch = size(xchg%prcs_xch) nxch = size(xchg%prcs_xch)
write(0,*) this_image(),nxch,nrl,ncl,' Exchanging with ',xchg%prcs_xch+1 !!$ if (this_image()==2) write(0,*) this_image(),nxch,nrl,ncl,' Exchanging with ',xchg%prcs_xch+1
do i=1,iwarm do i=1,iwarm
! Sync images temp = 0.d0
sync images(xchg%prcs_xch+1) xvc(nrl+1:) = 0.d0
do ip = 1, nxch ! Sync images
img = xchg%prcs_xch(ip) + 1 !sync images(xchg%prcs_xch+1)
p1 = xchg%loc_rcv_bnd(ip) do ip=1,nxch
p2 = xchg%loc_rcv_bnd(ip+1)-1 img = xchg%prcs_xch(ip) + 1
write(0,*) this_image(),'Boundaries ',p1,p2,' :',xchg%loc_rcv_idx(p1:p2),':',xchg%rmt_rcv_idx(p1:p2) event post(ready[img])
xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] end do
event wait(ready, until_count=nxch)
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
!!$ if (this_image()==2) write(0,*) this_image(),'Boundaries ',&
!!$ & p1,p2,' :',xchg%loc_rcv_idx(p1:p2),':',xchg%rmt_rcv_idx(p1:p2)
!!$ xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
temp(p1:p2) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
!!$ xvc(xchg%loc_rcv_idx(p1:p2)) = temp(p1:p2)
!!$ if (this_image()==2) write(0,*) this_image(),' :x: ',ip,' : ',&
!!$ &xvc(xchg%loc_rcv_idx(p1:p2)),' : ',xv%v%v(xchg%loc_rcv_idx(p1:p2))
end do
!!$ if (this_image()==2) write(0,*) this_image(),' :x: ',&
!!$ &xvc(nrl+1:ncl),' : ',xv%v%v(nrl+1:ncl)
call a%csmv(done,xvc,dzero,bvc,info)
end do end do
call a%csmv(done,xvc,dzero,bvc,info) call psb_barrier(ictxt)
end do tt1 = psb_wtime()
call psb_barrier(ictxt) do i=1,times
tt1 = psb_wtime() ! Sync images
do i=1,times ! sync images(xchg%prcs_xch+1)
! Sync images !!$ do ip=1,nxch
sync images(xchg%prcs_xch+1) !!$ img = xchg%prcs_xch(ip) + 1
do ip = 1, nxch !!$ event post(ready[img])
img = xchg%prcs_xch(ip) + 1 !!$ end do
p1 = xchg%loc_rcv_bnd(ip) !!$ event wait(ready, until_count=nxch)
p2 = xchg%loc_rcv_bnd(ip+1)-1
!xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
sync images (img)
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
!xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
temp(p1:p2) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
xvc(xchg%loc_rcv_idx(p1:p2)) = temp(p1:p2)
end do
call a%csmv(done,xvc,dzero,bvc,info)
end do end do
call a%csmv(done,xvc,dzero,bvc,info) call psb_barrier(ictxt)
end do tt2 = psb_wtime() - tt1
call psb_barrier(ictxt) call psb_amx(ictxt,tt2)
tt2 = psb_wtime() - tt1 end associate
call psb_amx(ictxt,tt2) call bvh%set(bvc(1:ncl))
end associate call xvh%set(xvc(1:ncl))
call bvh%set(bvc(1:ncl))
endif endif
!!$ do i=2,2
!!$ sync all
!!$ if (i==this_image()) then
!!$ write(0,*) this_image(),' Xdiff ',xv%v%v(nrl+1:ncl) -xvc(nrl+1:ncl)
!!$ write(0,*) this_image(),' X1 ',xv%v%v(nrl+1:ncl)
!!$ write(0,*) this_image(),' X2 ',xvc(nrl+1:ncl)
!!$ end if
!!$ end do
call psb_geaxpby(-done,bv,done,bvh,desc_a,info) call psb_geaxpby(-done,bv,done,bvh,desc_a,info)
err = psb_genrm2(bvh,desc_a,info) err = psb_genrm2(bvh,desc_a,info)
call psb_geaxpby(-done,xv,done,xvh,desc_a,info)
err2 = psb_genrm2(xvh,desc_a,info)
nr = desc_a%get_global_rows() nr = desc_a%get_global_rows()
annz = a%get_nzeros() annz = a%get_nzeros()
@ -222,6 +261,7 @@ program pdgenspmv
write(psb_out_unit,'("MFLOPS : ",F20.3)') tflops/1.d6 write(psb_out_unit,'("MFLOPS : ",F20.3)') tflops/1.d6
write(psb_out_unit,'("Difference : ",E20.12)') err write(psb_out_unit,'("Difference : ",E20.12)') err
write(psb_out_unit,'("Difference : ",E20.12)') err2
! !
! This computation is valid for CSR ! This computation is valid for CSR
@ -234,9 +274,9 @@ program pdgenspmv
write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%get_fmt() write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%get_fmt()
write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
end if end if
! !
! cleanup storage and exit ! cleanup storage and exit
@ -256,7 +296,7 @@ program pdgenspmv
stop stop
9999 call psb_error(ictxt) 9999 call psb_error(ictxt)
stop stop
contains contains
@ -280,7 +320,7 @@ contains
call psb_bcast(ictxt,afmt) call psb_bcast(ictxt,afmt)
call psb_bcast(ictxt,idim) call psb_bcast(ictxt,idim)
call psb_bcast(ictxt,times) call psb_bcast(ictxt,times)
if (iam == 0) then if (iam == 0) then
write(psb_out_unit,'("Testing matrix : ell1")') write(psb_out_unit,'("Testing matrix : ell1")')
write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim
@ -368,4 +408,4 @@ contains
g = exp(y**2-z**2) g = exp(y**2-z**2)
end if end if
end function g end function g
end program pdgenspmv end program

@ -1,3 +1,3 @@
CSR CSR
008 200
80 40

@ -1,4 +1,4 @@
8 Number of entries below this 7 Number of entries below this
BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES
BJAC Preconditioner NONE DIAG BJAC BJAC Preconditioner NONE DIAG BJAC
CSR Storage format for matrix A: CSR COO JAD CSR Storage format for matrix A: CSR COO JAD
@ -7,5 +7,5 @@ CSR Storage format for matrix A: CSR COO JAD
1000 MAXIT 1000 MAXIT
-1 ITRACE -1 ITRACE
002 IRST restart for RGMRES and BiCGSTABL 002 IRST restart for RGMRES and BiCGSTABL
10 NTESTS

Loading…
Cancel
Save