Experiment with remote indirect access.

psblas-caf-xp
sfilippone 9 years ago
parent 5295c552ff
commit 3adc9f4775

@ -72,12 +72,12 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
! ....local scalars.... ! ....local scalars....
integer(psb_ipk_) :: np, me, img integer(psb_ipk_) :: np, me, img
integer(psb_ipk_) :: err_act integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: nxch, nsnd, nrcv, nesd,nerv, ip, j, k, ixch integer(psb_ipk_) :: nxch, nsnd, nrcv, nesd,nerv, ip, j, k, ixch,mxnrcv
! ...parameters ! ...parameters
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
character(len=20) :: name character(len=20) :: name
integer(psb_ipk_), allocatable :: buf_rmt_rcv_bnd(:)[:], buf_rmt_snd_bnd(:)[:] integer(psb_ipk_), allocatable :: buf_rmt_rcv_bnd(:)[:], buf_rmt_snd_bnd(:)[:], buf_rmt_idx(:)[:]
type(event_type), allocatable, save :: snd_done(:)[:] type(event_type), allocatable, save :: snd_done(:)[:]
type(event_type), save :: rcv_done[*] type(event_type), save :: rcv_done[*]
name='psi_cnv_v2xch' name='psi_cnv_v2xch'
@ -100,6 +100,7 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
call psb_get_xch_idx(vidx_in, nxch, nsnd, nrcv) call psb_get_xch_idx(vidx_in, nxch, nsnd, nrcv)
xch_idx%max_buffer_size = max(nsnd,nrcv) xch_idx%max_buffer_size = max(nsnd,nrcv)
call psb_amx(ictxt,xch_idx%max_buffer_size) call psb_amx(ictxt,xch_idx%max_buffer_size)
mxnrcv = xch_idx%max_buffer_size
if (info == 0) call psb_realloc(nxch,xch_idx%prcs_xch,info) if (info == 0) call psb_realloc(nxch,xch_idx%prcs_xch,info)
if (info == 0) call psb_realloc(nxch,2,xch_idx%rmt_snd_bnd,info) if (info == 0) call psb_realloc(nxch,2,xch_idx%rmt_snd_bnd,info)
if (info == 0) call psb_realloc(nxch,2,xch_idx%rmt_rcv_bnd,info) if (info == 0) call psb_realloc(nxch,2,xch_idx%rmt_rcv_bnd,info)
@ -107,6 +108,7 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
if (info == 0) call psb_realloc(nxch+1,xch_idx%loc_rcv_bnd,info) if (info == 0) call psb_realloc(nxch+1,xch_idx%loc_rcv_bnd,info)
if (info == 0) call psb_realloc(nsnd,xch_idx%loc_snd_idx,info) if (info == 0) call psb_realloc(nsnd,xch_idx%loc_snd_idx,info)
if (info == 0) call psb_realloc(nrcv,xch_idx%loc_rcv_idx,info) if (info == 0) call psb_realloc(nrcv,xch_idx%loc_rcv_idx,info)
if (info == 0) call psb_realloc(nrcv,xch_idx%rmt_rcv_idx,info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_from_subroutine_ info = psb_err_from_subroutine_
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
@ -119,10 +121,12 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
ixch = 1 ixch = 1
xch_idx%loc_snd_bnd(1) = 1 xch_idx%loc_snd_bnd(1) = 1
xch_idx%loc_rcv_bnd(1) = 1 xch_idx%loc_rcv_bnd(1) = 1
if (allocated(buf_rmt_rcv_bnd)) deallocate(buf_rmt_rcv_bnd) if (allocated(buf_rmt_rcv_bnd)) deallocate(buf_rmt_rcv_bnd)
if (allocated(buf_rmt_snd_bnd)) deallocate(buf_rmt_snd_bnd) if (allocated(buf_rmt_snd_bnd)) deallocate(buf_rmt_snd_bnd)
if (allocated(snd_done)) deallocate(snd_done) if (allocated(snd_done)) deallocate(snd_done)
allocate(buf_rmt_rcv_bnd(np*2)[*], buf_rmt_snd_bnd(np*2)[*], snd_done(np)[*])
allocate(buf_rmt_rcv_bnd(np*2)[*], buf_rmt_snd_bnd(np*2)[*], snd_done(np)[*])
allocate(buf_rmt_idx(mxnrcv)[*])
do do
if (ip > size(vidx_in)) then if (ip > size(vidx_in)) then
write(psb_err_unit,*) trim(name),': Warning: out of size of input vector ' write(psb_err_unit,*) trim(name),': Warning: out of size of input vector '
@ -142,21 +146,24 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
xch_idx%loc_snd_bnd(ixch+1) = xch_idx%loc_snd_bnd(ixch) + nesd xch_idx%loc_snd_bnd(ixch+1) = xch_idx%loc_snd_bnd(ixch) + nesd
img = xch_idx%prcs_xch(ixch) + 1 img = xch_idx%prcs_xch(ixch) + 1
!Here I am assuming that all the data exchange between two images takes place in one exchange !Here I am assuming that all the data exchange between two images takes place in one exchange
buf_rmt_rcv_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_rcv_bnd(ixch:ixch+1) buf_rmt_rcv_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_rcv_bnd(ixch:ixch+1)
buf_rmt_snd_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_snd_bnd(ixch:ixch+1) buf_rmt_snd_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_snd_bnd(ixch:ixch+1)
event post(snd_done(me)[img]) buf_rmt_idx(1:nesd)[img] = xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1)
event wait(snd_done(img)) event post(snd_done(me)[img])
xch_idx%rmt_rcv_bnd(ixch,1:2)=buf_rmt_rcv_bnd(img*2 - 1 : img*2) event wait(snd_done(img))
xch_idx%rmt_snd_bnd(ixch,1:2)=buf_rmt_snd_bnd(img*2 - 1 : img*2) xch_idx%rmt_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) = &
& buf_rmt_idx(1:nerv)
xch_idx%rmt_rcv_bnd(ixch,1:2)=buf_rmt_rcv_bnd(img*2 - 1 : img*2)
xch_idx%rmt_snd_bnd(ixch,1:2)=buf_rmt_snd_bnd(img*2 - 1 : img*2)
ip = ip+nerv+nesd+3 ip = ip+nerv+nesd+3
ixch = ixch + 1 ixch = ixch + 1
end do end do
xch_idx%rmt_rcv_bnd(:,2) = xch_idx%rmt_rcv_bnd(:,2) - 1 xch_idx%rmt_rcv_bnd(:,2) = xch_idx%rmt_rcv_bnd(:,2) - 1
xch_idx%rmt_snd_bnd(:,2) = xch_idx%rmt_snd_bnd(:,2) - 1 xch_idx%rmt_snd_bnd(:,2) = xch_idx%rmt_snd_bnd(:,2) - 1
if (allocated(buf_rmt_rcv_bnd)) deallocate(buf_rmt_rcv_bnd) if (allocated(buf_rmt_rcv_bnd)) deallocate(buf_rmt_rcv_bnd)
if (allocated(buf_rmt_snd_bnd)) deallocate(buf_rmt_snd_bnd) if (allocated(buf_rmt_snd_bnd)) deallocate(buf_rmt_snd_bnd)
if (allocated(snd_done)) deallocate(snd_done) if (allocated(snd_done)) deallocate(snd_done)
sync all !sync all
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return

@ -163,7 +163,7 @@ subroutine psi_extract_dep_list(ictxt,is_bld,is_upd,desc_str,dep_list,&
i=1 i=1
if (debug_level >= psb_debug_inner_)& if (debug_level >= psb_debug_inner_)&
& write(debug_unit,*) me,' ',trim(name),': start ',info & write(debug_unit,*) me,' ',trim(name),': start ',info
!!$ write(0,*) 'extract_dep_list ',me,npr
pointer_dep_list=1 pointer_dep_list=1
if (is_bld) then if (is_bld) then
do while (desc_str(i) /= -1) do while (desc_str(i) /= -1)
@ -272,7 +272,12 @@ subroutine psi_extract_dep_list(ictxt,is_bld,is_upd,desc_str,dep_list,&
itmp(1:dl_lda) = dep_list(1:dl_lda,me) itmp(1:dl_lda) = dep_list(1:dl_lda,me)
dl_mpi = dl_lda dl_mpi = dl_lda
call caf_allgather(itmp,dl_mpi, dep_list, minfo) if (.false.) then
call caf_allgather(itmp, dl_mpi, dep_list, minfo)
else
call mpi_allgather(itmp,dl_mpi,psb_mpi_ipk_integer,&
& dep_list,dl_mpi,psb_mpi_ipk_integer,icomm,minfo)
end if
if (info == 0) deallocate(itmp,stat=info) if (info == 0) deallocate(itmp,stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info=psb_err_alloc_dealloc_ info=psb_err_alloc_dealloc_

@ -155,7 +155,7 @@ psblas/psb_s_psblas_mod.o psblas/psb_c_psblas_mod.o psblas/psb_d_psblas_mod.o ps
psb_base_mod.o: $(MODULES) psb_base_mod.o: $(MODULES)
psb_caf_mod.o: psb_caf_mod.f90 psb_const_mod.o psb_caf_mod.o: psb_caf_mod.f90 psb_const_mod.o psb_realloc_mod.o
$(FC) $(FINCLUDES) $(FDEFINES) $(F90COPT) $(EXTRA_OPT) -c $< -o $@ $(FC) $(FINCLUDES) $(FDEFINES) $(F90COPT) $(EXTRA_OPT) -c $< -o $@
psi_penv_mod.o: psi_penv_mod.F90 $(BASIC_MODS) psi_penv_mod.o: psi_penv_mod.F90 $(BASIC_MODS)

@ -204,6 +204,7 @@ module psb_desc_mod
integer(psb_ipk_), allocatable :: rmt_rcv_bnd(:,:) integer(psb_ipk_), allocatable :: rmt_rcv_bnd(:,:)
integer(psb_ipk_), allocatable :: loc_rcv_bnd(:) integer(psb_ipk_), allocatable :: loc_rcv_bnd(:)
integer(psb_ipk_), allocatable :: loc_snd_bnd(:) integer(psb_ipk_), allocatable :: loc_snd_bnd(:)
integer(psb_ipk_), allocatable :: rmt_rcv_idx(:)
integer(psb_ipk_), allocatable :: loc_rcv_idx(:) integer(psb_ipk_), allocatable :: loc_rcv_idx(:)
integer(psb_ipk_), allocatable :: loc_snd_idx(:) integer(psb_ipk_), allocatable :: loc_snd_idx(:)
integer(psb_ipk_) :: max_buffer_size=0 integer(psb_ipk_) :: max_buffer_size=0

@ -1131,6 +1131,7 @@ t2 = mpi_wtime() - t1
end subroutine caf_zgatherv end subroutine caf_zgatherv
subroutine caf_iallgatherv(snd, scount, rcv, rcount, rdispls, info) subroutine caf_iallgatherv(snd, scount, rcv, rcount, rdispls, info)
use psb_realloc_mod
implicit none implicit none
integer(psb_ipk_), intent(in) :: scount, snd(:), rcount(:), rdispls(:) integer(psb_ipk_), intent(in) :: scount, snd(:), rcount(:), rdispls(:)
integer(psb_ipk_), allocatable, intent(inout) :: rcv(:) integer(psb_ipk_), allocatable, intent(inout) :: rcv(:)
@ -1149,16 +1150,22 @@ t2 = mpi_wtime() - t1
info = -4 info = -4
return return
endif endif
! write(0,*) 'caf_allgatherv',me,np
! call flush(0)
! sync all
if (allocated(snd_buf)) deallocate(snd_buf) if (allocated(snd_buf)) deallocate(snd_buf)
allocate(snd_buf(scount)[*], STAT=info) allocate(snd_buf(scount)[*], STAT=info)
if (info/=0) return if (info/=0)then
write(0,*) me, 'Info on allocate ',info
return
end if
if (allocated(rcv).and.(size(rcv,1) < rdispls(np) + rcount(np))) then if (allocated(rcv).and.(size(rcv,1) < rdispls(np) + rcount(np))) then
call move_alloc(rcv,rcv_tmp) call psb_realloc(rdispls(np)+rcount(np),rcv,info)
allocate(rcv(rdispls(np)+rcount(np))) !!$ call move_alloc(rcv,rcv_tmp)
rcv(1:size(rcv_tmp,1))=rcv_tmp !!$ allocate(rcv(rdispls(np)+rcount(np)))
deallocate(rcv_tmp) !!$ rcv(1:size(rcv_tmp,1))=rcv_tmp
!!$ deallocate(rcv_tmp)
endif endif
snd_buf(1:scount)=snd(1:scount) snd_buf(1:scount)=snd(1:scount)
@ -1365,39 +1372,75 @@ t2 = mpi_wtime() - t1
integer(psb_ipk_), allocatable :: snd_buf(:)[:] integer(psb_ipk_), allocatable :: snd_buf(:)[:]
type(event_type), allocatable :: snd_copied(:)[:] type(event_type), allocatable :: snd_copied(:)[:]
double precision :: t1, t2 double precision :: t1, t2
t1 = mpi_wtime() t1 = mpi_wtime()
np = num_images() np = num_images()
me = this_image() me = this_image()
info = 0 info = 0
if (size(rcv,1) < scount) then if (size(rcv,1) < scount) then
info = -3 info = -3
print*,'error', info, size(rcv,1), scount print*,'error', info, size(rcv,1), scount
return return
endif endif
if (size(rcv,2) < np) then if (size(rcv,2) < np) then
info = -4 info = -4
print*,'error', info, size(rcv,2), np print*,'error', info, size(rcv,2), np
return return
endif endif
if (allocated(snd_buf)) deallocate(snd_buf) ! write(*,*) 'Hello from ',me,' of:', np,size(snd,1)
if (allocated(snd_copied)) deallocate(snd_copied) ! sync all
allocate(snd_buf(size(snd,1))[*]) if (.true.) then
allocate(snd_copied(np)[*]) !!$ if (allocated(snd_buf)) deallocate(snd_buf)
!allocate(snd_buf(size(snd,1))[*]) !!$ if (allocated(snd_copied)) deallocate(snd_copied)
snd_buf=snd allocate(snd_buf(size(snd,1))[*],stat=info)
do img=1,np if (info /= 0) then
event post(snd_copied(me)[img]) write(*,*) 'Error on allocating snd_buf ',me
enddo stop
!sync all end if
do img=1,np allocate(snd_copied(np)[*],stat=info)
event wait(snd_copied(img)) if (info /= 0) then
rcv(:,img)=snd_buf(:)[img] write(*,*) 'Error on allocating snd_copied ',me
enddo stop
if (allocated(snd_buf)) deallocate(snd_buf) end if
if (allocated(snd_copied)) deallocate(snd_copied) !allocate(snd_buf(size(snd,1))[*])
!Not sure this is necessary... snd_buf(:)=snd(:)
sync all ! write(*,*) 'Sending from ',me,':', snd(:)
t2 = mpi_wtime() - t1 do img=1,np
event post(snd_copied(me)[img])
enddo
!sync all
if(.false.) then
do img=1,np
event wait(snd_copied(img))
rcv(:,img)=snd_buf(:)[img]
enddo
else
do img=me+1,np
event wait(snd_copied(img))
rcv(:,img)=snd_buf(:)[img]
enddo
do img=1,me
event wait(snd_copied(img))
rcv(:,img)=snd_buf(:)[img]
enddo
end if
!if (allocated(snd_buf))
deallocate(snd_buf)
!if (allocated(snd_copied))
deallocate(snd_copied)
!Not sure this is necessary...
!sync all
else if(.false.) then
do img=1,np
if (me == img) rcv(:,img) = snd(:)
call co_broadcast(rcv(:,img),img)
end do
else if(.false.) then
rcv(:,:) = 0
rcv(:,me) = snd(:)
call co_sum(rcv)
end if
t2 = mpi_wtime() - t1
end subroutine caf_allgather end subroutine caf_allgather

@ -28,24 +28,24 @@ FLINK=$(MPFC)
LIBS= LIBS=
# BLAS, BLACS and METIS libraries. # BLAS, BLACS and METIS libraries.
BLAS=-lcblas -lf77blas -latlas -L/opt/atlas/3.8.4/gnu/7.1.0/lib BLAS=-lopenblas -L/mnt/gpfs0/home/e802756/builds/openblas/0.2.20
METIS_LIB=-lmetis -L/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/Lib -L/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/lib METIS_LIB=
AMD_LIB= AMD_LIB=
LAPACK=-llapack LAPACK=
EXTRA_COBJS= EXTRA_COBJS=
PSBFDEFINES=-DHAVE_METIS -DHAVE_LAPACK -DHAVE_MOLD -DHAVE_EXTENDS_TYPE_OF -DHAVE_SAME_TYPE_AS -DHAVE_FINAL -DHAVE_ISO_FORTRAN_ENV -DHAVE_FLUSH_STMT -DHAVE_VOLATILE -DMPI_MOD PSBFDEFINES=-DHAVE_LAPACK -DHAVE_MOLD -DHAVE_EXTENDS_TYPE_OF -DHAVE_SAME_TYPE_AS -DHAVE_FINAL -DHAVE_ISO_FORTRAN_ENV -DHAVE_FLUSH_STMT -DHAVE_VOLATILE -DMPI_MOD
PSBCDEFINES=-DHAVE_METIS_ -I/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/include -I/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/Include -DLowerUnderscore -DPtr64Bits PSBCDEFINES=-DLowerUnderscore -DPtr64Bits
AR=ar -cur AR=ar -cur
RANLIB=ranlib RANLIB=ranlib
INSTALL=/usr/bin/install -c INSTALL=/usr/bin/install -c
INSTALL_DATA=${INSTALL} -m 644 INSTALL_DATA=${INSTALL} -m 644
INSTALL_DIR=/opt/psblas/CAF/7.1.0-csr INSTALL_DIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107
INSTALL_LIBDIR=/opt/psblas/CAF/7.1.0-csr/lib INSTALL_LIBDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/lib
INSTALL_INCLUDEDIR=/opt/psblas/CAF/7.1.0-csr/include INSTALL_INCLUDEDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/include
INSTALL_DOCSDIR=/opt/psblas/CAF/7.1.0-csr/docs INSTALL_DOCSDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/docs
INSTALL_SAMPLESDIR=/opt/psblas/CAF/7.1.0-csr/samples INSTALL_SAMPLESDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/samples
# the following is the flag for /bin/cp which shall copy the file only for updating (timestamp based)--on GNU Linux, '-u' # the following is the flag for /bin/cp which shall copy the file only for updating (timestamp based)--on GNU Linux, '-u'
CPUPDFLAG= CPUPDFLAG=

@ -28,8 +28,7 @@ pingpong: pingpong.o
clean: clean:
/bin/rm -f hello.o pingpong.o /bin/rm -f hello.o pingpong.o $(EXEDIR)/hello
$(EXEDIR)/hello
verycleanlib: verycleanlib:
(cd ../..; make veryclean) (cd ../..; make veryclean)
lib: lib:

@ -1,21 +1,22 @@
program hello program hello
use psb_base_mod use iso_fortran_env
implicit none implicit none
integer iam, np, icontxt, ip, jp, idummy integer me, np, icontxt, ip, jp, idummy
integer :: snd_buf(4)[*]
type(event_type), allocatable :: snd_copied(:)[:]
call psb_init(icontxt)
call psb_info(icontxt,iam,np) me = this_image()
! have all processes check in np = num_images()
if ((iam >= 0).and.(iam < np)) then
if (iam == 0) then write(*,*) 'Hello from ',me,' of:', np
do ip = 1, np-1 snd_buf(1:4) = me*(/1,2,3,4/)
call psb_rcv(icontxt,idummy,ip)
enddo sync all
write(*,*) 'Hello, world: all ',np, & if (me == 1) then
& ' processes checked in!' do ip=1,np
else write(*,*) 'From ',ip,' :',snd_buf(:)[ip]
call psb_snd(icontxt,idummy,0) end do
endif
end if end if
call psb_exit(icontxt)
end program hello end program hello

@ -47,20 +47,24 @@ program pdgenspmv
! sparse matrix and preconditioner ! sparse matrix and preconditioner
type(psb_dspmat_type) :: a, ad, ah, ah1 type(psb_dspmat_type) :: a, ad, ah, ah1
type(psb_d_csr_sparse_mat) :: acsr
! descriptor ! descriptor
type(psb_desc_type) :: desc_a type(psb_desc_type) :: desc_a
! dense matrices ! dense matrices
type(psb_d_vect_type) :: xv,bv, vtst type(psb_d_vect_type) :: xv,bv, vtst,bvh
real(psb_dpk_), allocatable :: tst(:) real(psb_dpk_), allocatable :: tst(:), work(:)
real(psb_dpk_), allocatable :: xvc(:)[:], bvc(:)[:]
! blacs parameters ! blacs parameters
integer(psb_ipk_) :: ictxt, iam, np integer(psb_ipk_) :: ictxt, iam, np
type(psb_d_csre_sparse_mat) :: acsre ! type(psb_d_csre_sparse_mat) :: acsre
! solver parameters ! solver parameters
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, nr,nrl,ncl integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, nr,nrl,ncl, lwork, nclmx
integer(psb_ipk_) :: ip, img, nxch, p1,p2
integer(psb_long_int_k_) :: amatsize, precsize, descsize, d2size, annz, nbytes, ahnnz integer(psb_long_int_k_) :: amatsize, precsize, descsize, d2size, annz, nbytes, ahnnz
real(psb_dpk_) :: err, eps real(psb_dpk_) :: err, eps
integer(psb_ipk_), parameter :: times=50 integer(psb_ipk_) :: times
integer(psb_ipk_), parameter :: iwarm=10
! other variables ! other variables
integer(psb_ipk_) :: info, i integer(psb_ipk_) :: info, i
@ -91,7 +95,7 @@ program pdgenspmv
! !
! get parameters ! get parameters
! !
call get_parms(ictxt,afmt,idim) call get_parms(ictxt,afmt,idim, times)
! !
! allocate and fill in the coefficient matrix, rhs and initial guess ! allocate and fill in the coefficient matrix, rhs and initial guess
@ -108,52 +112,87 @@ program pdgenspmv
call psb_errpush(info,name,a_err=ch_err) call psb_errpush(info,name,a_err=ch_err)
goto 9999 goto 9999
end if end if
call psb_geasb(bvh,desc_a,info,scratch=.true.)
if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2 if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2
if (iam == psb_root_) write(psb_out_unit,'(" ")') if (iam == psb_root_) write(psb_out_unit,'(" ")')
!!$ write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'testmat-',idim,'-',np,'-',iam,'.mtx' !!$ write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'testmat-',idim,'-',np,'-',iam,'.mtx'
!!$ call a%print(fname,head="psb-testing") !!$ call a%print(fname,head="psb-testing")
call xv%set(done)
call psb_barrier(ictxt)
t1 = psb_wtime()
do i=1,times
call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n')
end do
call psb_barrier(ictxt)
t2 = psb_wtime() - t1
call psb_amx(ictxt,t2)
!
! Coarrays for experiment
!
nrl = desc_a%get_local_rows() nrl = desc_a%get_local_rows()
ncl = desc_a%get_local_cols() ncl = desc_a%get_local_cols()
call a%csclip(ad,info,jmax=nrl) nclmx = ncl
call a%csclip(ah,info,jmin=nrl+1,jmax=ncl,cscale=.true.) call psb_amx(ictxt,nclmx)
call a%csclip(ah1,info,jmin=nrl+1,jmax=ncl,cscale=.true.) allocate(xvc(nclmx)[*])
call ah%cscnv(info,mold=acsre) allocate(bvc(nclmx)[*])
call ah1%cscnv(info,mold=acsre) xvc(1:ncl) = done *(/(i,i=1,ncl)/)
bvc(:) = dzero
call xv%set(xvc(1:ncl))
do i=1,iwarm
call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n')
end do
! FIXME: cache flush needed here ! FIXME: cache flush needed here
call psb_barrier(ictxt) do i=1,iwarm
tt1 = psb_wtime() call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n')
do i=1,times
call psb_csmm(done,ah,xv,done,bv,info)
end do end do
call psb_barrier(ictxt) call psb_barrier(ictxt)
th = psb_wtime() - tt1
call psb_barrier(ictxt)
tt1 = psb_wtime() tt1 = psb_wtime()
do i=1,times do i=1,times
call psb_csmm(done,ah1,xv,done,bv,info) call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n')
end do end do
call psb_barrier(ictxt) call psb_barrier(ictxt)
th1 = psb_wtime() - tt1 th = psb_wtime() - tt1
call psb_amx(ictxt,th1) call psb_amx(ictxt,th)
if (.true.) then
associate(xchg => desc_a%halo_xch)
! FIXME: cache flush needed here
nxch = size(xchg%prcs_xch)
write(0,*) this_image(),nxch,nrl,ncl,' Exchanging with ',xchg%prcs_xch+1
do i=1,iwarm
! Sync images
sync images(xchg%prcs_xch+1)
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
write(0,*) this_image(),'Boundaries ',p1,p2,' :',xchg%loc_rcv_idx(p1:p2),':',xchg%rmt_rcv_idx(p1:p2)
xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
end do
call a%csmv(done,xvc,dzero,bvc,info)
end do
call psb_barrier(ictxt)
tt1 = psb_wtime()
do i=1,times
! Sync images
sync images(xchg%prcs_xch+1)
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
!xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
end do
call a%csmv(done,xvc,dzero,bvc,info)
end do
call psb_barrier(ictxt)
tt2 = psb_wtime() - tt1
call psb_amx(ictxt,tt2)
end associate
call bvh%set(bvc(1:ncl))
endif
call psb_geaxpby(-done,bv,done,bvh,desc_a,info)
err = psb_genrm2(bvh,desc_a,info)
call psb_amx(ictxt,t2)
nr = desc_a%get_global_rows() nr = desc_a%get_global_rows()
annz = a%get_nzeros() annz = a%get_nzeros()
ahnnz = ah%get_nzeros()
amatsize = a%sizeof() amatsize = a%sizeof()
descsize = psb_sizeof(desc_a) descsize = psb_sizeof(desc_a)
call psb_sum(ictxt,annz) call psb_sum(ictxt,annz)
@ -163,39 +202,36 @@ program pdgenspmv
if (iam == psb_root_) then if (iam == psb_root_) then
flops = 2.d0*times*annz flops = 2.d0*times*annz
tflops = 2.d0*times*ahnnz tflops = flops
tflops1 = 2.d0*times*ahnnz
write(psb_out_unit,'("Matrix: ell1 ",i0)') idim write(psb_out_unit,'("Matrix: ell1 ",i0)') idim
write(psb_out_unit,'("Test on : ",i20," processors")') np write(psb_out_unit,'("Test on : ",i20," processors")') np
write(psb_out_unit,'("Size of matrix : ",i20," ")') nr write(psb_out_unit,'("Size of matrix : ",i20," ")') nr
write(psb_out_unit,'("Number of nonzeros : ",i20," ")') annz write(psb_out_unit,'("Number of nonzeros : ",i20," ")') annz
write(psb_out_unit,'("Number of nonzeros : ",i20," ")') ahnnz
write(psb_out_unit,'("Memory occupation : ",i20," ")') amatsize write(psb_out_unit,'("Memory occupation : ",i20," ")') amatsize
write(psb_out_unit,'("Number of flops (",i0," prod) : ",F20.0," ")') times,flops write(psb_out_unit,'("Number of flops (",i0," prod) : ",F20.0," ")') times,flops
flops = flops / (t2) flops = flops / (th)
tflops = tflops / (th) tflops = tflops / (tt2)
tflops1 = tflops1 / (th1)
write(psb_out_unit,'("Time for ",i0," products (s) : ",F20.3)')times, t2 write(psb_out_unit,'("Time for ",i0," products (s) : ",F20.3)')times, th
write(psb_out_unit,'("Time per product (ms) : ",F20.3)') t2*1.d3/(1.d0*times) write(psb_out_unit,'("Time per product (ms) : ",F20.3)') th*1.d3/(1.d0*times)
write(psb_out_unit,'("MFLOPS : ",F20.3)') flops/1.d6 write(psb_out_unit,'("MFLOPS : ",F20.3)') flops/1.d6
write(psb_out_unit,'("Time for ",i0," products (s) (trans.): ",F20.3)') times,th write(psb_out_unit,'("Time for ",i0," products (s) nw : ",F20.3)')times, tt2
write(psb_out_unit,'("Time per product (ms) (trans.): ",F20.3)') th*1.d3/(1.d0*times) write(psb_out_unit,'("Time per product (ms) nw : ",F20.3)') tt2*1.d3/(1.d0*times)
write(psb_out_unit,'("MFLOPS (trans.): ",F20.3)') tflops/1.d6 write(psb_out_unit,'("MFLOPS : ",F20.3)') tflops/1.d6
write(psb_out_unit,'("Time for ",i0," products (s) (trans.): ",F20.3)') times,th1
write(psb_out_unit,'("Time per product (ms) (trans.): ",F20.3)') th1*1.d3/(1.d0*times) write(psb_out_unit,'("Difference : ",E20.12)') err
write(psb_out_unit,'("MFLOPS (trans.): ",F20.3)') tflops1/1.d6
! !
! This computation is valid for CSR ! This computation is valid for CSR
! !
nbytes = nr*(2*psb_sizeof_dp + psb_sizeof_int)+& nbytes = nr*(2*psb_sizeof_dp + psb_sizeof_int)+&
& annz*(psb_sizeof_dp + psb_sizeof_int) & annz*(psb_sizeof_dp + psb_sizeof_int)
bdwdth = times*nbytes/(t2*1.d6) bdwdth = times*nbytes/(th*1.d6)
write(psb_out_unit,*) write(psb_out_unit,*)
write(psb_out_unit,'("MBYTES/S : ",F20.3)') bdwdth write(psb_out_unit,'("MBYTES/S : ",F20.3)') bdwdth
bdwdth = times*nbytes/(tt2*1.d6)
write(psb_out_unit,'("MBYTES/S (trans): ",F20.3)') bdwdth
write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%get_fmt() write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%get_fmt()
write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
@ -227,10 +263,10 @@ contains
! !
! get iteration parameters from standard input ! get iteration parameters from standard input
! !
subroutine get_parms(ictxt,afmt,idim) subroutine get_parms(ictxt,afmt,idim, times)
integer(psb_ipk_) :: ictxt integer(psb_ipk_) :: ictxt
character(len=*) :: afmt character(len=*) :: afmt
integer(psb_ipk_) :: idim integer(psb_ipk_) :: idim, times
integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: intbuf(10), ip integer(psb_ipk_) :: intbuf(10), ip
@ -239,9 +275,11 @@ contains
if (iam == 0) then if (iam == 0) then
read(psb_inp_unit,*) afmt read(psb_inp_unit,*) afmt
read(psb_inp_unit,*) idim read(psb_inp_unit,*) idim
read(psb_inp_unit,*) times
endif endif
call psb_bcast(ictxt,afmt) call psb_bcast(ictxt,afmt)
call psb_bcast(ictxt,idim) call psb_bcast(ictxt,idim)
call psb_bcast(ictxt,times)
if (iam == 0) then if (iam == 0) then
write(psb_out_unit,'("Testing matrix : ell1")') write(psb_out_unit,'("Testing matrix : ell1")')

@ -1,3 +1,3 @@
CSR CSR
120 008
80

@ -236,6 +236,7 @@ program psb_d_pde2d
if (iam == psb_root_) then if (iam == psb_root_) then
write(psb_out_unit,'(" ")') write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Number of processors : ",i0)') np
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2 write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter
write(psb_out_unit,'("Number of iterations : ",i0)')iter write(psb_out_unit,'("Number of iterations : ",i0)')iter

@ -131,7 +131,7 @@ program psb_d_pde3d
! miscellaneous ! miscellaneous
real(psb_dpk_), parameter :: one = 1.d0 real(psb_dpk_), parameter :: one = 1.d0
real(psb_dpk_) :: t1, t2, tprec real(psb_dpk_) :: t1, t2, tprec, tslv, tprcavg, tslvavg
! sparse matrix and preconditioner ! sparse matrix and preconditioner
type(psb_dspmat_type) :: a type(psb_dspmat_type) :: a
@ -152,7 +152,8 @@ program psb_d_pde3d
integer(psb_ipk_) :: info, i integer(psb_ipk_) :: info, i
character(len=20) :: name,ch_err character(len=20) :: name,ch_err
character(len=40) :: fname character(len=40) :: fname
integer(psb_ipk_) :: itest, ntests
integer, parameter :: ntdef=10, nwrmup=4
info=psb_success_ info=psb_success_
@ -166,7 +167,7 @@ program psb_d_pde3d
endif endif
if(psb_get_errstatus() /= 0) goto 9999 if(psb_get_errstatus() /= 0) goto 9999
name='pde3d90' name='pde3d90'
call psb_set_errverbosity(itwo) call psb_set_errverbosity(itwo*500)
call psb_cd_set_large_threshold(itwo) call psb_cd_set_large_threshold(itwo)
! !
! Hello world ! Hello world
@ -178,7 +179,7 @@ program psb_d_pde3d
! !
! get parameters ! get parameters
! !
call get_parms(ictxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst) call get_parms(ictxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ntests)
! !
! allocate and fill in the coefficient matrix, rhs and initial guess ! allocate and fill in the coefficient matrix, rhs and initial guess
@ -201,45 +202,70 @@ program psb_d_pde3d
! prepare the preconditioner. ! prepare the preconditioner.
! !
if(iam == psb_root_) write(psb_out_unit,'("Setting preconditioner to : ",a)')ptype if(iam == psb_root_) write(psb_out_unit,'("Setting preconditioner to : ",a)')ptype
call prec%init(ptype,info) tslv = 1d300
tprec = 1d300
tprcavg = 0
tslvavg = 0
do itest=1, ntests+nwrmup
call prec%init(ptype,info)
call psb_barrier(ictxt)
t1 = psb_wtime()
call prec%build(a,desc_a,info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_precbld'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_barrier(ictxt) t2 = psb_wtime()-t1
t1 = psb_wtime() call psb_amx(ictxt,t2)
if (itest>nwrmup) then
tprec = min(tprec,t2)
tprcavg = tprcavg + t2
end if
call prec%free(info)
end do
if (iam == psb_root_) write(psb_out_unit,'(" ")')
call prec%init(ptype,info)
call prec%build(a,desc_a,info) call prec%build(a,desc_a,info)
if(info /= psb_success_) then call prec%descr()
info=psb_err_from_subroutine_
ch_err='psb_precbld'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
tprec = psb_wtime()-t1
call psb_amx(ictxt,tprec) do itest=1, ntests+nwrmup
!
! iterative method parameters
!
call xxv%zero()
if(iam == psb_root_) write(psb_out_unit,'("Calling iterative method ",a)')kmethd
call psb_barrier(ictxt)
t1 = psb_wtime()
eps = 1.d-9
call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,&
& itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='solver routine'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if (iam == psb_root_) write(psb_out_unit,'("Preconditioner time : ",es12.5)')tprec call psb_barrier(ictxt)
if (iam == psb_root_) write(psb_out_unit,'(" ")') t2 = psb_wtime() - t1
call prec%descr() call psb_amx(ictxt,t2)
! if (itest>nwrmup) then
! iterative method parameters tslv = min(tslv,t2)
! tslvavg = tslvavg + t2
if(iam == psb_root_) write(psb_out_unit,'("Calling iterative method ",a)')kmethd end if
call psb_barrier(ictxt)
t1 = psb_wtime() end do
eps = 1.d-9
call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,&
& itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='solver routine'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_barrier(ictxt)
t2 = psb_wtime() - t1
call psb_amx(ictxt,t2)
amatsize = a%sizeof() amatsize = a%sizeof()
descsize = desc_a%sizeof() descsize = desc_a%sizeof()
precsize = prec%sizeof() precsize = prec%sizeof()
@ -249,8 +275,12 @@ program psb_d_pde3d
if (iam == psb_root_) then if (iam == psb_root_) then
write(psb_out_unit,'(" ")') write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2 write(psb_out_unit,'("Number of processors : ",i0)') np
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter write(psb_out_unit,'("Preconditioner time min : ",es12.5)')tprec
write(psb_out_unit,'("Preconditioner time avg : ",es12.5)')tprcavg
write(psb_out_unit,'("Time to solve system min : ",es12.5)')tslv
write(psb_out_unit,'("Time to solve system avg : ",es12.5)')tslvavg
write(psb_out_unit,'("Time per iteration avg : ",es12.5)')tslvavg/iter
write(psb_out_unit,'("Number of iterations : ",i0)')iter write(psb_out_unit,'("Number of iterations : ",i0)')iter
write(psb_out_unit,'("Convergence indicator on exit : ",es12.5)')err write(psb_out_unit,'("Convergence indicator on exit : ",es12.5)')err
write(psb_out_unit,'("Info on exit : ",i0)')info write(psb_out_unit,'("Info on exit : ",i0)')info
@ -267,7 +297,6 @@ program psb_d_pde3d
call psb_gefree(bv,desc_a,info) call psb_gefree(bv,desc_a,info)
call psb_gefree(xxv,desc_a,info) call psb_gefree(xxv,desc_a,info)
call psb_spfree(a,desc_a,info) call psb_spfree(a,desc_a,info)
call prec%free(info)
call psb_cdfree(desc_a,info) call psb_cdfree(desc_a,info)
if(info /= psb_success_) then if(info /= psb_success_) then
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
@ -276,7 +305,7 @@ program psb_d_pde3d
goto 9999 goto 9999
end if end if
call psb_exit(ictxt) call psb_exit(ictxt,close=.false.)
stop stop
9999 call psb_error(ictxt) 9999 call psb_error(ictxt)
@ -287,10 +316,10 @@ contains
! !
! get iteration parameters from standard input ! get iteration parameters from standard input
! !
subroutine get_parms(ictxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst) subroutine get_parms(ictxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ntests)
integer(psb_ipk_) :: ictxt integer(psb_ipk_) :: ictxt
character(len=*) :: kmethd, ptype, afmt character(len=*) :: kmethd, ptype, afmt
integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst, ntests
integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: ip integer(psb_ipk_) :: ip
@ -325,6 +354,11 @@ contains
else else
irst=1 irst=1
endif endif
if (ip >= 8) then
read(psb_inp_unit,*) ntests
else
ntests = ntdef
endif
! broadcast parameters to all processors ! broadcast parameters to all processors
@ -353,6 +387,7 @@ contains
call psb_bcast(ictxt,itmax) call psb_bcast(ictxt,itmax)
call psb_bcast(ictxt,itrace) call psb_bcast(ictxt,itrace)
call psb_bcast(ictxt,irst) call psb_bcast(ictxt,irst)
call psb_bcast(ictxt,ntests)
return return

@ -235,6 +235,7 @@ program psb_s_pde2d
if (iam == psb_root_) then if (iam == psb_root_) then
write(psb_out_unit,'(" ")') write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Number of processors : ",i0)') np
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2 write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter
write(psb_out_unit,'("Number of iterations : ",i0)')iter write(psb_out_unit,'("Number of iterations : ",i0)')iter

@ -249,6 +249,7 @@ program psb_s_pde3d
if (iam == psb_root_) then if (iam == psb_root_) then
write(psb_out_unit,'(" ")') write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Number of processors : ",i0)') np
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2 write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter
write(psb_out_unit,'("Number of iterations : ",i0)')iter write(psb_out_unit,'("Number of iterations : ",i0)')iter

@ -1,11 +1,11 @@
7 Number of entries below this 8 Number of entries below this
BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES
BJAC Preconditioner NONE DIAG BJAC BJAC Preconditioner NONE DIAG BJAC
CSR Storage format for matrix A: CSR COO JAD CSR Storage format for matrix A: CSR COO JAD
1000 Domain size (acutal system is this**3) 200 Domain size (acutal system is this**3)
2 Stopping criterion 2 Stopping criterion
1000 MAXIT 1000 MAXIT
-1 ITRACE -1 ITRACE
002 IRST restart for RGMRES and BiCGSTABL 002 IRST restart for RGMRES and BiCGSTABL
10 NTESTS

Loading…
Cancel
Save