Experiment with remote indirect access.

psblas-caf-xp
sfilippone 9 years ago
parent 5295c552ff
commit 3adc9f4775

@ -72,12 +72,12 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
! ....local scalars....
integer(psb_ipk_) :: np, me, img
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: nxch, nsnd, nrcv, nesd,nerv, ip, j, k, ixch
integer(psb_ipk_) :: nxch, nsnd, nrcv, nesd,nerv, ip, j, k, ixch,mxnrcv
! ...parameters
integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: debug=.false.
character(len=20) :: name
integer(psb_ipk_), allocatable :: buf_rmt_rcv_bnd(:)[:], buf_rmt_snd_bnd(:)[:]
integer(psb_ipk_), allocatable :: buf_rmt_rcv_bnd(:)[:], buf_rmt_snd_bnd(:)[:], buf_rmt_idx(:)[:]
type(event_type), allocatable, save :: snd_done(:)[:]
type(event_type), save :: rcv_done[*]
name='psi_cnv_v2xch'
@ -100,6 +100,7 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
call psb_get_xch_idx(vidx_in, nxch, nsnd, nrcv)
xch_idx%max_buffer_size = max(nsnd,nrcv)
call psb_amx(ictxt,xch_idx%max_buffer_size)
mxnrcv = xch_idx%max_buffer_size
if (info == 0) call psb_realloc(nxch,xch_idx%prcs_xch,info)
if (info == 0) call psb_realloc(nxch,2,xch_idx%rmt_snd_bnd,info)
if (info == 0) call psb_realloc(nxch,2,xch_idx%rmt_rcv_bnd,info)
@ -107,6 +108,7 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
if (info == 0) call psb_realloc(nxch+1,xch_idx%loc_rcv_bnd,info)
if (info == 0) call psb_realloc(nsnd,xch_idx%loc_snd_idx,info)
if (info == 0) call psb_realloc(nrcv,xch_idx%loc_rcv_idx,info)
if (info == 0) call psb_realloc(nrcv,xch_idx%rmt_rcv_idx,info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
@ -119,10 +121,12 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
ixch = 1
xch_idx%loc_snd_bnd(1) = 1
xch_idx%loc_rcv_bnd(1) = 1
if (allocated(buf_rmt_rcv_bnd)) deallocate(buf_rmt_rcv_bnd)
if (allocated(buf_rmt_snd_bnd)) deallocate(buf_rmt_snd_bnd)
if (allocated(snd_done)) deallocate(snd_done)
allocate(buf_rmt_rcv_bnd(np*2)[*], buf_rmt_snd_bnd(np*2)[*], snd_done(np)[*])
if (allocated(buf_rmt_rcv_bnd)) deallocate(buf_rmt_rcv_bnd)
if (allocated(buf_rmt_snd_bnd)) deallocate(buf_rmt_snd_bnd)
if (allocated(snd_done)) deallocate(snd_done)
allocate(buf_rmt_rcv_bnd(np*2)[*], buf_rmt_snd_bnd(np*2)[*], snd_done(np)[*])
allocate(buf_rmt_idx(mxnrcv)[*])
do
if (ip > size(vidx_in)) then
write(psb_err_unit,*) trim(name),': Warning: out of size of input vector '
@ -142,21 +146,24 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
xch_idx%loc_snd_bnd(ixch+1) = xch_idx%loc_snd_bnd(ixch) + nesd
img = xch_idx%prcs_xch(ixch) + 1
!Here I am assuming that all the data exchange between two images takes place in one exchange
buf_rmt_rcv_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_rcv_bnd(ixch:ixch+1)
buf_rmt_snd_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_snd_bnd(ixch:ixch+1)
event post(snd_done(me)[img])
event wait(snd_done(img))
xch_idx%rmt_rcv_bnd(ixch,1:2)=buf_rmt_rcv_bnd(img*2 - 1 : img*2)
xch_idx%rmt_snd_bnd(ixch,1:2)=buf_rmt_snd_bnd(img*2 - 1 : img*2)
buf_rmt_rcv_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_rcv_bnd(ixch:ixch+1)
buf_rmt_snd_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_snd_bnd(ixch:ixch+1)
buf_rmt_idx(1:nesd)[img] = xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1)
event post(snd_done(me)[img])
event wait(snd_done(img))
xch_idx%rmt_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) = &
& buf_rmt_idx(1:nerv)
xch_idx%rmt_rcv_bnd(ixch,1:2)=buf_rmt_rcv_bnd(img*2 - 1 : img*2)
xch_idx%rmt_snd_bnd(ixch,1:2)=buf_rmt_snd_bnd(img*2 - 1 : img*2)
ip = ip+nerv+nesd+3
ixch = ixch + 1
end do
xch_idx%rmt_rcv_bnd(:,2) = xch_idx%rmt_rcv_bnd(:,2) - 1
xch_idx%rmt_snd_bnd(:,2) = xch_idx%rmt_snd_bnd(:,2) - 1
if (allocated(buf_rmt_rcv_bnd)) deallocate(buf_rmt_rcv_bnd)
if (allocated(buf_rmt_snd_bnd)) deallocate(buf_rmt_snd_bnd)
if (allocated(snd_done)) deallocate(snd_done)
sync all
if (allocated(buf_rmt_rcv_bnd)) deallocate(buf_rmt_rcv_bnd)
if (allocated(buf_rmt_snd_bnd)) deallocate(buf_rmt_snd_bnd)
if (allocated(snd_done)) deallocate(snd_done)
!sync all
call psb_erractionrestore(err_act)
return

@ -163,7 +163,7 @@ subroutine psi_extract_dep_list(ictxt,is_bld,is_upd,desc_str,dep_list,&
i=1
if (debug_level >= psb_debug_inner_)&
& write(debug_unit,*) me,' ',trim(name),': start ',info
!!$ write(0,*) 'extract_dep_list ',me,npr
pointer_dep_list=1
if (is_bld) then
do while (desc_str(i) /= -1)
@ -272,7 +272,12 @@ subroutine psi_extract_dep_list(ictxt,is_bld,is_upd,desc_str,dep_list,&
itmp(1:dl_lda) = dep_list(1:dl_lda,me)
dl_mpi = dl_lda
call caf_allgather(itmp,dl_mpi, dep_list, minfo)
if (.false.) then
call caf_allgather(itmp, dl_mpi, dep_list, minfo)
else
call mpi_allgather(itmp,dl_mpi,psb_mpi_ipk_integer,&
& dep_list,dl_mpi,psb_mpi_ipk_integer,icomm,minfo)
end if
if (info == 0) deallocate(itmp,stat=info)
if (info /= psb_success_) then
info=psb_err_alloc_dealloc_

@ -155,7 +155,7 @@ psblas/psb_s_psblas_mod.o psblas/psb_c_psblas_mod.o psblas/psb_d_psblas_mod.o ps
psb_base_mod.o: $(MODULES)
psb_caf_mod.o: psb_caf_mod.f90 psb_const_mod.o
psb_caf_mod.o: psb_caf_mod.f90 psb_const_mod.o psb_realloc_mod.o
$(FC) $(FINCLUDES) $(FDEFINES) $(F90COPT) $(EXTRA_OPT) -c $< -o $@
psi_penv_mod.o: psi_penv_mod.F90 $(BASIC_MODS)

@ -204,6 +204,7 @@ module psb_desc_mod
integer(psb_ipk_), allocatable :: rmt_rcv_bnd(:,:)
integer(psb_ipk_), allocatable :: loc_rcv_bnd(:)
integer(psb_ipk_), allocatable :: loc_snd_bnd(:)
integer(psb_ipk_), allocatable :: rmt_rcv_idx(:)
integer(psb_ipk_), allocatable :: loc_rcv_idx(:)
integer(psb_ipk_), allocatable :: loc_snd_idx(:)
integer(psb_ipk_) :: max_buffer_size=0

@ -1131,6 +1131,7 @@ t2 = mpi_wtime() - t1
end subroutine caf_zgatherv
subroutine caf_iallgatherv(snd, scount, rcv, rcount, rdispls, info)
use psb_realloc_mod
implicit none
integer(psb_ipk_), intent(in) :: scount, snd(:), rcount(:), rdispls(:)
integer(psb_ipk_), allocatable, intent(inout) :: rcv(:)
@ -1149,16 +1150,22 @@ t2 = mpi_wtime() - t1
info = -4
return
endif
! write(0,*) 'caf_allgatherv',me,np
! call flush(0)
! sync all
if (allocated(snd_buf)) deallocate(snd_buf)
allocate(snd_buf(scount)[*], STAT=info)
if (info/=0) return
if (info/=0)then
write(0,*) me, 'Info on allocate ',info
return
end if
if (allocated(rcv).and.(size(rcv,1) < rdispls(np) + rcount(np))) then
call move_alloc(rcv,rcv_tmp)
allocate(rcv(rdispls(np)+rcount(np)))
rcv(1:size(rcv_tmp,1))=rcv_tmp
deallocate(rcv_tmp)
call psb_realloc(rdispls(np)+rcount(np),rcv,info)
!!$ call move_alloc(rcv,rcv_tmp)
!!$ allocate(rcv(rdispls(np)+rcount(np)))
!!$ rcv(1:size(rcv_tmp,1))=rcv_tmp
!!$ deallocate(rcv_tmp)
endif
snd_buf(1:scount)=snd(1:scount)
@ -1365,39 +1372,75 @@ t2 = mpi_wtime() - t1
integer(psb_ipk_), allocatable :: snd_buf(:)[:]
type(event_type), allocatable :: snd_copied(:)[:]
double precision :: t1, t2
t1 = mpi_wtime()
np = num_images()
me = this_image()
info = 0
if (size(rcv,1) < scount) then
info = -3
print*,'error', info, size(rcv,1), scount
return
endif
if (size(rcv,2) < np) then
info = -4
print*,'error', info, size(rcv,2), np
return
endif
if (allocated(snd_buf)) deallocate(snd_buf)
if (allocated(snd_copied)) deallocate(snd_copied)
allocate(snd_buf(size(snd,1))[*])
allocate(snd_copied(np)[*])
!allocate(snd_buf(size(snd,1))[*])
snd_buf=snd
do img=1,np
event post(snd_copied(me)[img])
enddo
!sync all
do img=1,np
event wait(snd_copied(img))
rcv(:,img)=snd_buf(:)[img]
enddo
if (allocated(snd_buf)) deallocate(snd_buf)
if (allocated(snd_copied)) deallocate(snd_copied)
!Not sure this is necessary...
sync all
t2 = mpi_wtime() - t1
t1 = mpi_wtime()
np = num_images()
me = this_image()
info = 0
if (size(rcv,1) < scount) then
info = -3
print*,'error', info, size(rcv,1), scount
return
endif
if (size(rcv,2) < np) then
info = -4
print*,'error', info, size(rcv,2), np
return
endif
! write(*,*) 'Hello from ',me,' of:', np,size(snd,1)
! sync all
if (.true.) then
!!$ if (allocated(snd_buf)) deallocate(snd_buf)
!!$ if (allocated(snd_copied)) deallocate(snd_copied)
allocate(snd_buf(size(snd,1))[*],stat=info)
if (info /= 0) then
write(*,*) 'Error on allocating snd_buf ',me
stop
end if
allocate(snd_copied(np)[*],stat=info)
if (info /= 0) then
write(*,*) 'Error on allocating snd_copied ',me
stop
end if
!allocate(snd_buf(size(snd,1))[*])
snd_buf(:)=snd(:)
! write(*,*) 'Sending from ',me,':', snd(:)
do img=1,np
event post(snd_copied(me)[img])
enddo
!sync all
if(.false.) then
do img=1,np
event wait(snd_copied(img))
rcv(:,img)=snd_buf(:)[img]
enddo
else
do img=me+1,np
event wait(snd_copied(img))
rcv(:,img)=snd_buf(:)[img]
enddo
do img=1,me
event wait(snd_copied(img))
rcv(:,img)=snd_buf(:)[img]
enddo
end if
!if (allocated(snd_buf))
deallocate(snd_buf)
!if (allocated(snd_copied))
deallocate(snd_copied)
!Not sure this is necessary...
!sync all
else if(.false.) then
do img=1,np
if (me == img) rcv(:,img) = snd(:)
call co_broadcast(rcv(:,img),img)
end do
else if(.false.) then
rcv(:,:) = 0
rcv(:,me) = snd(:)
call co_sum(rcv)
end if
t2 = mpi_wtime() - t1
end subroutine caf_allgather

@ -28,24 +28,24 @@ FLINK=$(MPFC)
LIBS=
# BLAS, BLACS and METIS libraries.
BLAS=-lcblas -lf77blas -latlas -L/opt/atlas/3.8.4/gnu/7.1.0/lib
METIS_LIB=-lmetis -L/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/Lib -L/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/lib
BLAS=-lopenblas -L/mnt/gpfs0/home/e802756/builds/openblas/0.2.20
METIS_LIB=
AMD_LIB=
LAPACK=-llapack
LAPACK=
EXTRA_COBJS=
PSBFDEFINES=-DHAVE_METIS -DHAVE_LAPACK -DHAVE_MOLD -DHAVE_EXTENDS_TYPE_OF -DHAVE_SAME_TYPE_AS -DHAVE_FINAL -DHAVE_ISO_FORTRAN_ENV -DHAVE_FLUSH_STMT -DHAVE_VOLATILE -DMPI_MOD
PSBCDEFINES=-DHAVE_METIS_ -I/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/include -I/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/Include -DLowerUnderscore -DPtr64Bits
PSBFDEFINES=-DHAVE_LAPACK -DHAVE_MOLD -DHAVE_EXTENDS_TYPE_OF -DHAVE_SAME_TYPE_AS -DHAVE_FINAL -DHAVE_ISO_FORTRAN_ENV -DHAVE_FLUSH_STMT -DHAVE_VOLATILE -DMPI_MOD
PSBCDEFINES=-DLowerUnderscore -DPtr64Bits
AR=ar -cur
RANLIB=ranlib
INSTALL=/usr/bin/install -c
INSTALL_DATA=${INSTALL} -m 644
INSTALL_DIR=/opt/psblas/CAF/7.1.0-csr
INSTALL_LIBDIR=/opt/psblas/CAF/7.1.0-csr/lib
INSTALL_INCLUDEDIR=/opt/psblas/CAF/7.1.0-csr/include
INSTALL_DOCSDIR=/opt/psblas/CAF/7.1.0-csr/docs
INSTALL_SAMPLESDIR=/opt/psblas/CAF/7.1.0-csr/samples
INSTALL_DIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107
INSTALL_LIBDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/lib
INSTALL_INCLUDEDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/include
INSTALL_DOCSDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/docs
INSTALL_SAMPLESDIR=/mnt/gpfs0/home/e802756/NUMERICAL/CAF/7.1.0-ompi1107/samples
# the following is the flag for /bin/cp which shall copy the file only for updating (timestamp based)--on GNU Linux, '-u'
CPUPDFLAG=

@ -28,8 +28,7 @@ pingpong: pingpong.o
clean:
/bin/rm -f hello.o pingpong.o
$(EXEDIR)/hello
/bin/rm -f hello.o pingpong.o $(EXEDIR)/hello
verycleanlib:
(cd ../..; make veryclean)
lib:

@ -1,21 +1,22 @@
program hello
use psb_base_mod
use iso_fortran_env
implicit none
integer iam, np, icontxt, ip, jp, idummy
integer me, np, icontxt, ip, jp, idummy
integer :: snd_buf(4)[*]
type(event_type), allocatable :: snd_copied(:)[:]
call psb_init(icontxt)
call psb_info(icontxt,iam,np)
! have all processes check in
if ((iam >= 0).and.(iam < np)) then
if (iam == 0) then
do ip = 1, np-1
call psb_rcv(icontxt,idummy,ip)
enddo
write(*,*) 'Hello, world: all ',np, &
& ' processes checked in!'
else
call psb_snd(icontxt,idummy,0)
endif
me = this_image()
np = num_images()
write(*,*) 'Hello from ',me,' of:', np
snd_buf(1:4) = me*(/1,2,3,4/)
sync all
if (me == 1) then
do ip=1,np
write(*,*) 'From ',ip,' :',snd_buf(:)[ip]
end do
end if
call psb_exit(icontxt)
end program hello

@ -47,20 +47,24 @@ program pdgenspmv
! sparse matrix and preconditioner
type(psb_dspmat_type) :: a, ad, ah, ah1
type(psb_d_csr_sparse_mat) :: acsr
! descriptor
type(psb_desc_type) :: desc_a
! dense matrices
type(psb_d_vect_type) :: xv,bv, vtst
real(psb_dpk_), allocatable :: tst(:)
type(psb_d_vect_type) :: xv,bv, vtst,bvh
real(psb_dpk_), allocatable :: tst(:), work(:)
real(psb_dpk_), allocatable :: xvc(:)[:], bvc(:)[:]
! blacs parameters
integer(psb_ipk_) :: ictxt, iam, np
type(psb_d_csre_sparse_mat) :: acsre
! type(psb_d_csre_sparse_mat) :: acsre
! solver parameters
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, nr,nrl,ncl
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, nr,nrl,ncl, lwork, nclmx
integer(psb_ipk_) :: ip, img, nxch, p1,p2
integer(psb_long_int_k_) :: amatsize, precsize, descsize, d2size, annz, nbytes, ahnnz
real(psb_dpk_) :: err, eps
integer(psb_ipk_), parameter :: times=50
integer(psb_ipk_) :: times
integer(psb_ipk_), parameter :: iwarm=10
! other variables
integer(psb_ipk_) :: info, i
@ -91,7 +95,7 @@ program pdgenspmv
!
! get parameters
!
call get_parms(ictxt,afmt,idim)
call get_parms(ictxt,afmt,idim, times)
!
! allocate and fill in the coefficient matrix, rhs and initial guess
@ -108,52 +112,87 @@ program pdgenspmv
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_geasb(bvh,desc_a,info,scratch=.true.)
if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2
if (iam == psb_root_) write(psb_out_unit,'(" ")')
!!$ write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'testmat-',idim,'-',np,'-',iam,'.mtx'
!!$ call a%print(fname,head="psb-testing")
call xv%set(done)
call psb_barrier(ictxt)
t1 = psb_wtime()
do i=1,times
call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n')
end do
call psb_barrier(ictxt)
t2 = psb_wtime() - t1
call psb_amx(ictxt,t2)
!
! Coarrays for experiment
!
nrl = desc_a%get_local_rows()
ncl = desc_a%get_local_cols()
call a%csclip(ad,info,jmax=nrl)
call a%csclip(ah,info,jmin=nrl+1,jmax=ncl,cscale=.true.)
call a%csclip(ah1,info,jmin=nrl+1,jmax=ncl,cscale=.true.)
call ah%cscnv(info,mold=acsre)
call ah1%cscnv(info,mold=acsre)
nclmx = ncl
call psb_amx(ictxt,nclmx)
allocate(xvc(nclmx)[*])
allocate(bvc(nclmx)[*])
xvc(1:ncl) = done *(/(i,i=1,ncl)/)
bvc(:) = dzero
call xv%set(xvc(1:ncl))
do i=1,iwarm
call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n')
end do
! FIXME: cache flush needed here
call psb_barrier(ictxt)
tt1 = psb_wtime()
do i=1,times
call psb_csmm(done,ah,xv,done,bv,info)
do i=1,iwarm
call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n')
end do
call psb_barrier(ictxt)
th = psb_wtime() - tt1
call psb_barrier(ictxt)
tt1 = psb_wtime()
do i=1,times
call psb_csmm(done,ah1,xv,done,bv,info)
call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'n')
end do
call psb_barrier(ictxt)
th1 = psb_wtime() - tt1
call psb_amx(ictxt,th1)
th = psb_wtime() - tt1
call psb_amx(ictxt,th)
if (.true.) then
associate(xchg => desc_a%halo_xch)
! FIXME: cache flush needed here
nxch = size(xchg%prcs_xch)
write(0,*) this_image(),nxch,nrl,ncl,' Exchanging with ',xchg%prcs_xch+1
do i=1,iwarm
! Sync images
sync images(xchg%prcs_xch+1)
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
write(0,*) this_image(),'Boundaries ',p1,p2,' :',xchg%loc_rcv_idx(p1:p2),':',xchg%rmt_rcv_idx(p1:p2)
xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
end do
call a%csmv(done,xvc,dzero,bvc,info)
end do
call psb_barrier(ictxt)
tt1 = psb_wtime()
do i=1,times
! Sync images
sync images(xchg%prcs_xch+1)
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
!xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
end do
call a%csmv(done,xvc,dzero,bvc,info)
end do
call psb_barrier(ictxt)
tt2 = psb_wtime() - tt1
call psb_amx(ictxt,tt2)
end associate
call bvh%set(bvc(1:ncl))
endif
call psb_geaxpby(-done,bv,done,bvh,desc_a,info)
err = psb_genrm2(bvh,desc_a,info)
call psb_amx(ictxt,t2)
nr = desc_a%get_global_rows()
annz = a%get_nzeros()
ahnnz = ah%get_nzeros()
amatsize = a%sizeof()
descsize = psb_sizeof(desc_a)
call psb_sum(ictxt,annz)
@ -163,39 +202,36 @@ program pdgenspmv
if (iam == psb_root_) then
flops = 2.d0*times*annz
tflops = 2.d0*times*ahnnz
tflops1 = 2.d0*times*ahnnz
tflops = flops
write(psb_out_unit,'("Matrix: ell1 ",i0)') idim
write(psb_out_unit,'("Test on : ",i20," processors")') np
write(psb_out_unit,'("Size of matrix : ",i20," ")') nr
write(psb_out_unit,'("Number of nonzeros : ",i20," ")') annz
write(psb_out_unit,'("Number of nonzeros : ",i20," ")') ahnnz
write(psb_out_unit,'("Memory occupation : ",i20," ")') amatsize
write(psb_out_unit,'("Number of flops (",i0," prod) : ",F20.0," ")') times,flops
flops = flops / (t2)
tflops = tflops / (th)
tflops1 = tflops1 / (th1)
write(psb_out_unit,'("Time for ",i0," products (s) : ",F20.3)')times, t2
write(psb_out_unit,'("Time per product (ms) : ",F20.3)') t2*1.d3/(1.d0*times)
flops = flops / (th)
tflops = tflops / (tt2)
write(psb_out_unit,'("Time for ",i0," products (s) : ",F20.3)')times, th
write(psb_out_unit,'("Time per product (ms) : ",F20.3)') th*1.d3/(1.d0*times)
write(psb_out_unit,'("MFLOPS : ",F20.3)') flops/1.d6
write(psb_out_unit,'("Time for ",i0," products (s) (trans.): ",F20.3)') times,th
write(psb_out_unit,'("Time per product (ms) (trans.): ",F20.3)') th*1.d3/(1.d0*times)
write(psb_out_unit,'("MFLOPS (trans.): ",F20.3)') tflops/1.d6
write(psb_out_unit,'("Time for ",i0," products (s) (trans.): ",F20.3)') times,th1
write(psb_out_unit,'("Time per product (ms) (trans.): ",F20.3)') th1*1.d3/(1.d0*times)
write(psb_out_unit,'("MFLOPS (trans.): ",F20.3)') tflops1/1.d6
write(psb_out_unit,'("Time for ",i0," products (s) nw : ",F20.3)')times, tt2
write(psb_out_unit,'("Time per product (ms) nw : ",F20.3)') tt2*1.d3/(1.d0*times)
write(psb_out_unit,'("MFLOPS : ",F20.3)') tflops/1.d6
write(psb_out_unit,'("Difference : ",E20.12)') err
!
! This computation is valid for CSR
!
nbytes = nr*(2*psb_sizeof_dp + psb_sizeof_int)+&
& annz*(psb_sizeof_dp + psb_sizeof_int)
bdwdth = times*nbytes/(t2*1.d6)
bdwdth = times*nbytes/(th*1.d6)
write(psb_out_unit,*)
write(psb_out_unit,'("MBYTES/S : ",F20.3)') bdwdth
bdwdth = times*nbytes/(tt2*1.d6)
write(psb_out_unit,'("MBYTES/S (trans): ",F20.3)') bdwdth
write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%get_fmt()
write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
@ -227,10 +263,10 @@ contains
!
! get iteration parameters from standard input
!
subroutine get_parms(ictxt,afmt,idim)
subroutine get_parms(ictxt,afmt,idim, times)
integer(psb_ipk_) :: ictxt
character(len=*) :: afmt
integer(psb_ipk_) :: idim
integer(psb_ipk_) :: idim, times
integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: intbuf(10), ip
@ -239,9 +275,11 @@ contains
if (iam == 0) then
read(psb_inp_unit,*) afmt
read(psb_inp_unit,*) idim
read(psb_inp_unit,*) times
endif
call psb_bcast(ictxt,afmt)
call psb_bcast(ictxt,idim)
call psb_bcast(ictxt,times)
if (iam == 0) then
write(psb_out_unit,'("Testing matrix : ell1")')

@ -1,3 +1,3 @@
CSR
120
008
80

@ -236,6 +236,7 @@ program psb_d_pde2d
if (iam == psb_root_) then
write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Number of processors : ",i0)') np
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter
write(psb_out_unit,'("Number of iterations : ",i0)')iter

@ -131,7 +131,7 @@ program psb_d_pde3d
! miscellaneous
real(psb_dpk_), parameter :: one = 1.d0
real(psb_dpk_) :: t1, t2, tprec
real(psb_dpk_) :: t1, t2, tprec, tslv, tprcavg, tslvavg
! sparse matrix and preconditioner
type(psb_dspmat_type) :: a
@ -152,7 +152,8 @@ program psb_d_pde3d
integer(psb_ipk_) :: info, i
character(len=20) :: name,ch_err
character(len=40) :: fname
integer(psb_ipk_) :: itest, ntests
integer, parameter :: ntdef=10, nwrmup=4
info=psb_success_
@ -166,7 +167,7 @@ program psb_d_pde3d
endif
if(psb_get_errstatus() /= 0) goto 9999
name='pde3d90'
call psb_set_errverbosity(itwo)
call psb_set_errverbosity(itwo*500)
call psb_cd_set_large_threshold(itwo)
!
! Hello world
@ -178,7 +179,7 @@ program psb_d_pde3d
!
! get parameters
!
call get_parms(ictxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst)
call get_parms(ictxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ntests)
!
! allocate and fill in the coefficient matrix, rhs and initial guess
@ -201,45 +202,70 @@ program psb_d_pde3d
! prepare the preconditioner.
!
if(iam == psb_root_) write(psb_out_unit,'("Setting preconditioner to : ",a)')ptype
call prec%init(ptype,info)
tslv = 1d300
tprec = 1d300
tprcavg = 0
tslvavg = 0
do itest=1, ntests+nwrmup
call prec%init(ptype,info)
call psb_barrier(ictxt)
t1 = psb_wtime()
call prec%build(a,desc_a,info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_precbld'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_barrier(ictxt)
t1 = psb_wtime()
t2 = psb_wtime()-t1
call psb_amx(ictxt,t2)
if (itest>nwrmup) then
tprec = min(tprec,t2)
tprcavg = tprcavg + t2
end if
call prec%free(info)
end do
if (iam == psb_root_) write(psb_out_unit,'(" ")')
call prec%init(ptype,info)
call prec%build(a,desc_a,info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_precbld'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call prec%descr()
tprec = psb_wtime()-t1
call psb_amx(ictxt,tprec)
do itest=1, ntests+nwrmup
!
! iterative method parameters
!
call xxv%zero()
if(iam == psb_root_) write(psb_out_unit,'("Calling iterative method ",a)')kmethd
call psb_barrier(ictxt)
t1 = psb_wtime()
eps = 1.d-9
call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,&
& itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='solver routine'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if (iam == psb_root_) write(psb_out_unit,'("Preconditioner time : ",es12.5)')tprec
if (iam == psb_root_) write(psb_out_unit,'(" ")')
call prec%descr()
!
! iterative method parameters
!
if(iam == psb_root_) write(psb_out_unit,'("Calling iterative method ",a)')kmethd
call psb_barrier(ictxt)
t1 = psb_wtime()
eps = 1.d-9
call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,&
& itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst)
call psb_barrier(ictxt)
t2 = psb_wtime() - t1
call psb_amx(ictxt,t2)
if (itest>nwrmup) then
tslv = min(tslv,t2)
tslvavg = tslvavg + t2
end if
end do
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='solver routine'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_barrier(ictxt)
t2 = psb_wtime() - t1
call psb_amx(ictxt,t2)
amatsize = a%sizeof()
descsize = desc_a%sizeof()
precsize = prec%sizeof()
@ -249,8 +275,12 @@ program psb_d_pde3d
if (iam == psb_root_) then
write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter
write(psb_out_unit,'("Number of processors : ",i0)') np
write(psb_out_unit,'("Preconditioner time min : ",es12.5)')tprec
write(psb_out_unit,'("Preconditioner time avg : ",es12.5)')tprcavg
write(psb_out_unit,'("Time to solve system min : ",es12.5)')tslv
write(psb_out_unit,'("Time to solve system avg : ",es12.5)')tslvavg
write(psb_out_unit,'("Time per iteration avg : ",es12.5)')tslvavg/iter
write(psb_out_unit,'("Number of iterations : ",i0)')iter
write(psb_out_unit,'("Convergence indicator on exit : ",es12.5)')err
write(psb_out_unit,'("Info on exit : ",i0)')info
@ -267,7 +297,6 @@ program psb_d_pde3d
call psb_gefree(bv,desc_a,info)
call psb_gefree(xxv,desc_a,info)
call psb_spfree(a,desc_a,info)
call prec%free(info)
call psb_cdfree(desc_a,info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
@ -276,7 +305,7 @@ program psb_d_pde3d
goto 9999
end if
call psb_exit(ictxt)
call psb_exit(ictxt,close=.false.)
stop
9999 call psb_error(ictxt)
@ -287,10 +316,10 @@ contains
!
! get iteration parameters from standard input
!
subroutine get_parms(ictxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst)
subroutine get_parms(ictxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ntests)
integer(psb_ipk_) :: ictxt
character(len=*) :: kmethd, ptype, afmt
integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst
integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst, ntests
integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: ip
@ -325,6 +354,11 @@ contains
else
irst=1
endif
if (ip >= 8) then
read(psb_inp_unit,*) ntests
else
ntests = ntdef
endif
! broadcast parameters to all processors
@ -353,6 +387,7 @@ contains
call psb_bcast(ictxt,itmax)
call psb_bcast(ictxt,itrace)
call psb_bcast(ictxt,irst)
call psb_bcast(ictxt,ntests)
return

@ -235,6 +235,7 @@ program psb_s_pde2d
if (iam == psb_root_) then
write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Number of processors : ",i0)') np
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter
write(psb_out_unit,'("Number of iterations : ",i0)')iter

@ -249,6 +249,7 @@ program psb_s_pde3d
if (iam == psb_root_) then
write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Number of processors : ",i0)') np
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter
write(psb_out_unit,'("Number of iterations : ",i0)')iter

@ -1,11 +1,11 @@
7 Number of entries below this
8 Number of entries below this
BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES
BJAC Preconditioner NONE DIAG BJAC
CSR Storage format for matrix A: CSR COO JAD
1000 Domain size (acutal system is this**3)
200 Domain size (acutal system is this**3)
2 Stopping criterion
1000 MAXIT
-1 ITRACE
002 IRST restart for RGMRES and BiCGSTABL
10 NTESTS

Loading…
Cancel
Save