Version that works (produces correct results).

SF
psblas-caf-xp
sfilippone 9 years ago
parent 3adc9f4775
commit e684fe27fb

@ -77,7 +77,8 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: debug=.false.
character(len=20) :: name
integer(psb_ipk_), allocatable :: buf_rmt_rcv_bnd(:)[:], buf_rmt_snd_bnd(:)[:], buf_rmt_idx(:)[:]
integer(psb_ipk_), allocatable :: buf_rmt_rcv_bnd(:)[:], buf_rmt_snd_bnd(:)[:]
integer(psb_ipk_), allocatable :: buf_rmt_rcv_idx(:)[:], buf_rmt_snd_idx(:)[:]
type(event_type), allocatable, save :: snd_done(:)[:]
type(event_type), save :: rcv_done[*]
name='psi_cnv_v2xch'
@ -126,7 +127,8 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
if (allocated(snd_done)) deallocate(snd_done)
allocate(buf_rmt_rcv_bnd(np*2)[*], buf_rmt_snd_bnd(np*2)[*], snd_done(np)[*])
allocate(buf_rmt_idx(mxnrcv)[*])
allocate(buf_rmt_rcv_idx(mxnrcv)[*])
allocate(buf_rmt_snd_idx(mxnrcv)[*])
do
if (ip > size(vidx_in)) then
write(psb_err_unit,*) trim(name),': Warning: out of size of input vector '
@ -145,14 +147,20 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
xch_idx%loc_rcv_bnd(ixch+1) = xch_idx%loc_rcv_bnd(ixch) + nerv
xch_idx%loc_snd_bnd(ixch+1) = xch_idx%loc_snd_bnd(ixch) + nesd
img = xch_idx%prcs_xch(ixch) + 1
!Here I am assuming that all the data exchange between two images takes place in one exchange
!Here I am assuming that all the data exchange between two images takes place in one exchange
sync images(img)
buf_rmt_rcv_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_rcv_bnd(ixch:ixch+1)
buf_rmt_snd_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_snd_bnd(ixch:ixch+1)
buf_rmt_idx(1:nesd)[img] = xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1)
!!$ if (img == 2) write(*,*) this_image(),'Send idx to 2:',&
!!$ & xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1)
buf_rmt_rcv_idx(1:nesd)[img] = xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1)
buf_rmt_snd_idx(1:nerv)[img] = xch_idx%loc_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1)
event post(snd_done(me)[img])
event wait(snd_done(img))
xch_idx%rmt_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) = &
& buf_rmt_idx(1:nerv)
& buf_rmt_rcv_idx(1:nerv)
xch_idx%rmt_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) = &
& buf_rmt_snd_idx(1:nesd)
xch_idx%rmt_rcv_bnd(ixch,1:2)=buf_rmt_rcv_bnd(img*2 - 1 : img*2)
xch_idx%rmt_snd_bnd(ixch,1:2)=buf_rmt_snd_bnd(img*2 - 1 : img*2)
ip = ip+nerv+nesd+3

@ -187,9 +187,13 @@ subroutine psi_desc_index(desc,index_in,dep_list,&
i = i + nerv + 1
end do
ihinsz=i
if (if_caf) then
call caf_alltoall(sdsz, rvsz,1, minfo)
else
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1,psb_mpi_def_integer,icomm,minfo)
endif
if (minfo /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='caf_alltoall')
call psb_errpush(psb_err_from_subroutine_,name,a_err='mpi_alltoall')
goto 9999
end if
@ -293,9 +297,10 @@ subroutine psi_desc_index(desc,index_in,dep_list,&
idxr = idxr + rvsz(proc+1)
end do
call caf_alltoallv(sndbuf, sdsz, bsdindx, rcvbuf, rvsz, brvindx, minfo)
call mpi_alltoallv(sndbuf,sdsz,bsdindx,psb_mpi_ipk_integer,&
& rcvbuf,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo)
if (minfo /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='caf_alltoallv')
call psb_errpush(psb_err_from_subroutine_,name,a_err='mpi_alltoallv')
goto 9999
end if

@ -205,6 +205,7 @@ module psb_desc_mod
integer(psb_ipk_), allocatable :: loc_rcv_bnd(:)
integer(psb_ipk_), allocatable :: loc_snd_bnd(:)
integer(psb_ipk_), allocatable :: rmt_rcv_idx(:)
integer(psb_ipk_), allocatable :: rmt_snd_idx(:)
integer(psb_ipk_), allocatable :: loc_rcv_idx(:)
integer(psb_ipk_), allocatable :: loc_snd_idx(:)
integer(psb_ipk_) :: max_buffer_size=0

@ -41,11 +41,11 @@ RANLIB=ranlib
INSTALL=/usr/bin/install -c
INSTALL_DATA=${INSTALL} -m 644
INSTALL_DIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107
INSTALL_LIBDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/lib
INSTALL_INCLUDEDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/include
INSTALL_DOCSDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/docs
INSTALL_SAMPLESDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/samples
INSTALL_DIR=/mnt/gpfs0/home/e802756/NUMERICAL/DEV/8.0.0-caf-xp
INSTALL_LIBDIR=/mnt/gpfs0/home/e802756/NUMERICAL/DEV/8.0.0-caf-xp/lib
INSTALL_INCLUDEDIR=/mnt/gpfs0/home/e802756/NUMERICAL/DEV/8.0.0-caf-xp/include
INSTALL_DOCSDIR=/mnt/gpfs0/home/e802756/NUMERICAL/DEV/8.0.0-caf-xp/docs
INSTALL_SAMPLESDIR=/mnt/gpfs0/home/e802756/NUMERICAL/DEV/8.0.0-caf-xp/samples
# the following is the flag for /bin/cp which shall copy the file only for updating (timestamp based)--on GNU Linux, '-u'
CPUPDFLAG=

@ -51,9 +51,10 @@ program pdgenspmv
! descriptor
type(psb_desc_type) :: desc_a
! dense matrices
type(psb_d_vect_type) :: xv,bv, vtst,bvh
real(psb_dpk_), allocatable :: tst(:), work(:)
type(psb_d_vect_type) :: xv,bv, vtst,bvh, xvh
real(psb_dpk_), allocatable :: tst(:), work(:), temp(:)
real(psb_dpk_), allocatable :: xvc(:)[:], bvc(:)[:]
type(event_type), allocatable :: ready[:]
! blacs parameters
integer(psb_ipk_) :: ictxt, iam, np
! type(psb_d_csre_sparse_mat) :: acsre
@ -62,9 +63,9 @@ program pdgenspmv
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, nr,nrl,ncl, lwork, nclmx
integer(psb_ipk_) :: ip, img, nxch, p1,p2
integer(psb_long_int_k_) :: amatsize, precsize, descsize, d2size, annz, nbytes, ahnnz
real(psb_dpk_) :: err, eps
real(psb_dpk_) :: err, eps, err2
integer(psb_ipk_) :: times
integer(psb_ipk_), parameter :: iwarm=10
integer(psb_ipk_), parameter :: iwarm=2
! other variables
integer(psb_ipk_) :: info, i
@ -73,7 +74,7 @@ program pdgenspmv
info=psb_success_
call psb_init(ictxt)
call psb_info(ictxt,iam,np)
@ -117,8 +118,8 @@ program pdgenspmv
if (iam == psb_root_) write(psb_out_unit,'(" ")')
!!$ write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'testmat-',idim,'-',np,'-',iam,'.mtx'
!!$ call a%print(fname,head="psb-testing")
!
! Coarrays for experiment
!
@ -126,8 +127,11 @@ program pdgenspmv
ncl = desc_a%get_local_cols()
nclmx = ncl
call psb_amx(ictxt,nclmx)
!!$ write(*,*) iam,'NCLMX',nclmx,ncl
allocate(xvc(nclmx)[*])
allocate(bvc(nclmx)[*])
allocate(ready[*])
allocate(temp(nclmx))
xvc(1:ncl) = done *(/(i,i=1,ncl)/)
bvc(:) = dzero
call xv%set(xvc(1:ncl))
@ -137,7 +141,7 @@ program pdgenspmv
call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n')
end do
! FIXME: cache flush needed here
do i=1,iwarm
call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n')
@ -150,46 +154,81 @@ program pdgenspmv
call psb_barrier(ictxt)
th = psb_wtime() - tt1
call psb_amx(ictxt,th)
if (.true.) then
associate(xchg => desc_a%halo_xch)
! FIXME: cache flush needed here
nxch = size(xchg%prcs_xch)
write(0,*) this_image(),nxch,nrl,ncl,' Exchanging with ',xchg%prcs_xch+1
do i=1,iwarm
! Sync images
sync images(xchg%prcs_xch+1)
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
write(0,*) this_image(),'Boundaries ',p1,p2,' :',xchg%loc_rcv_idx(p1:p2),':',xchg%rmt_rcv_idx(p1:p2)
xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
associate(xchg => desc_a%halo_xch)
! FIXME: cache flush needed here
nxch = size(xchg%prcs_xch)
!!$ if (this_image()==2) write(0,*) this_image(),nxch,nrl,ncl,' Exchanging with ',xchg%prcs_xch+1
do i=1,iwarm
temp = 0.d0
xvc(nrl+1:) = 0.d0
! Sync images
!sync images(xchg%prcs_xch+1)
do ip=1,nxch
img = xchg%prcs_xch(ip) + 1
event post(ready[img])
end do
event wait(ready, until_count=nxch)
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
!!$ if (this_image()==2) write(0,*) this_image(),'Boundaries ',&
!!$ & p1,p2,' :',xchg%loc_rcv_idx(p1:p2),':',xchg%rmt_rcv_idx(p1:p2)
!!$ xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
temp(p1:p2) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
!!$ xvc(xchg%loc_rcv_idx(p1:p2)) = temp(p1:p2)
!!$ if (this_image()==2) write(0,*) this_image(),' :x: ',ip,' : ',&
!!$ &xvc(xchg%loc_rcv_idx(p1:p2)),' : ',xv%v%v(xchg%loc_rcv_idx(p1:p2))
end do
!!$ if (this_image()==2) write(0,*) this_image(),' :x: ',&
!!$ &xvc(nrl+1:ncl),' : ',xv%v%v(nrl+1:ncl)
call a%csmv(done,xvc,dzero,bvc,info)
end do
call a%csmv(done,xvc,dzero,bvc,info)
end do
call psb_barrier(ictxt)
tt1 = psb_wtime()
do i=1,times
! Sync images
sync images(xchg%prcs_xch+1)
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
!xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
call psb_barrier(ictxt)
tt1 = psb_wtime()
do i=1,times
! Sync images
! sync images(xchg%prcs_xch+1)
!!$ do ip=1,nxch
!!$ img = xchg%prcs_xch(ip) + 1
!!$ event post(ready[img])
!!$ end do
!!$ event wait(ready, until_count=nxch)
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
sync images (img)
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
!xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
temp(p1:p2) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
xvc(xchg%loc_rcv_idx(p1:p2)) = temp(p1:p2)
end do
call a%csmv(done,xvc,dzero,bvc,info)
end do
call a%csmv(done,xvc,dzero,bvc,info)
end do
call psb_barrier(ictxt)
tt2 = psb_wtime() - tt1
call psb_amx(ictxt,tt2)
end associate
call bvh%set(bvc(1:ncl))
call psb_barrier(ictxt)
tt2 = psb_wtime() - tt1
call psb_amx(ictxt,tt2)
end associate
call bvh%set(bvc(1:ncl))
call xvh%set(xvc(1:ncl))
endif
!!$ do i=2,2
!!$ sync all
!!$ if (i==this_image()) then
!!$ write(0,*) this_image(),' Xdiff ',xv%v%v(nrl+1:ncl) -xvc(nrl+1:ncl)
!!$ write(0,*) this_image(),' X1 ',xv%v%v(nrl+1:ncl)
!!$ write(0,*) this_image(),' X2 ',xvc(nrl+1:ncl)
!!$ end if
!!$ end do
call psb_geaxpby(-done,bv,done,bvh,desc_a,info)
err = psb_genrm2(bvh,desc_a,info)
call psb_geaxpby(-done,xv,done,xvh,desc_a,info)
err2 = psb_genrm2(xvh,desc_a,info)
nr = desc_a%get_global_rows()
annz = a%get_nzeros()
@ -222,6 +261,7 @@ program pdgenspmv
write(psb_out_unit,'("MFLOPS : ",F20.3)') tflops/1.d6
write(psb_out_unit,'("Difference : ",E20.12)') err
write(psb_out_unit,'("Difference : ",E20.12)') err2
!
! This computation is valid for CSR
@ -234,9 +274,9 @@ program pdgenspmv
write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%get_fmt()
write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
end if
!
! cleanup storage and exit
@ -256,7 +296,7 @@ program pdgenspmv
stop
9999 call psb_error(ictxt)
stop
contains
@ -280,7 +320,7 @@ contains
call psb_bcast(ictxt,afmt)
call psb_bcast(ictxt,idim)
call psb_bcast(ictxt,times)
if (iam == 0) then
write(psb_out_unit,'("Testing matrix : ell1")')
write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim
@ -368,4 +408,4 @@ contains
g = exp(y**2-z**2)
end if
end function g
end program pdgenspmv
end program

@ -1,3 +1,3 @@
CSR
008
80
200
40

@ -1,4 +1,4 @@
8 Number of entries below this
7 Number of entries below this
BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES
BJAC Preconditioner NONE DIAG BJAC
CSR Storage format for matrix A: CSR COO JAD
@ -7,5 +7,5 @@ CSR Storage format for matrix A: CSR COO JAD
1000 MAXIT
-1 ITRACE
002 IRST restart for RGMRES and BiCGSTABL
10 NTESTS

Loading…
Cancel
Save