From 89235a0dea156a93e97e621de920b7013eb89e03 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 28 Jul 2017 15:39:02 +0100 Subject: [PATCH] Fixed dswapdata and sample program. --- base/comm/internals/psi_dswapdata.F90 | 4 +-- include/Make.inc.psblas | 46 +++++++++++---------------- test/kernel/pdgenspmv.f90 | 23 +++++++++++--- test/kernel/runs/spmv.inp | 2 +- 4 files changed, 40 insertions(+), 35 deletions(-) diff --git a/base/comm/internals/psi_dswapdata.F90 b/base/comm/internals/psi_dswapdata.F90 index f357abd5..50890dd1 100644 --- a/base/comm/internals/psi_dswapdata.F90 +++ b/base/comm/internals/psi_dswapdata.F90 @@ -738,7 +738,7 @@ subroutine psi_dswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info) logical, parameter :: do_events=.false. integer(psb_ipk_) :: ierr(5) character(len=20) :: name - integer, allocatable :: img_list(:) + integer, allocatable, save :: img_list(:) info=psb_success_ name='psi_xchg_vect' call psb_erractionsave(err_act) @@ -882,7 +882,7 @@ subroutine psi_dswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info) end do else !versions with sync images - if (allocated(img_list)) deallocate(img_list) + if (allocated(img_list)) deallocate(img_list) allocate(img_list(nxch)) do ip= 1, nxch img = xchg%prcs_xch(ip) + 1 diff --git a/include/Make.inc.psblas b/include/Make.inc.psblas index bd9e87a3..237e017f 100644 --- a/include/Make.inc.psblas +++ b/include/Make.inc.psblas @@ -11,45 +11,41 @@ # The following ones are the variables used by the PSBLAS make scripts. -F90=caf -FC= caf -CC=mpicc -F90COPT=-O3 -FCOPT=-O3 -CCOPT=-O3 +FC=caf +CC=mpicc +FCOPT=-g -O3 +CCOPT=-g -O3 FMFLAG=-I FIFLAG=-I EXTRA_OPT= # These three should be always set! -MPF90=caf -MPF77=caf -MPCC=mpicc +MPFC=caf +MPCC=mpicc -F90LINK=$(MPF90) -FLINK=$(MPF77) +FLINK=$(MPFC) -LIBS= -L/home/users/pasqua/NUMERICAL/LIB/atlas/gnu491 +LIBS= # BLAS, BLACS and METIS libraries. -BLAS=-lcblas -lf77blas -latlas -METIS_LIB= +BLAS=-lcblas -lf77blas -latlas -L/opt/atlas/3.8.4/gnu/7.1.0/lib +METIS_LIB=-lmetis -L/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/Lib -L/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/lib AMD_LIB= LAPACK=-llapack EXTRA_COBJS= -PSBFDEFINES= -DHAVE_MOLD -DHAVE_EXTENDS_TYPE_OF -DHAVE_SAME_TYPE_AS -DHAVE_FINAL -DHAVE_ISO_FORTRAN_ENV -DHAVE_FLUSH_STMT -DHAVE_VOLATILE -DHAVE_ISO_C_BINDING -DHAVE_MOVE_ALLOC -DMPI_MOD -PSBCDEFINES=-DLowerUnderscore -DPtr64Bits +PSBFDEFINES=-DHAVE_METIS -DHAVE_LAPACK -DHAVE_MOLD -DHAVE_EXTENDS_TYPE_OF -DHAVE_SAME_TYPE_AS -DHAVE_FINAL -DHAVE_ISO_FORTRAN_ENV -DHAVE_FLUSH_STMT -DHAVE_VOLATILE -DMPI_MOD +PSBCDEFINES=-DHAVE_METIS_ -I/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/include -I/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/Include -DLowerUnderscore -DPtr64Bits AR=ar -cur RANLIB=ranlib INSTALL=/usr/bin/install -c INSTALL_DATA=${INSTALL} -m 644 -INSTALL_DIR=/home/users/pasqua/Ambra/LIB/PSBLAS -INSTALL_LIBDIR=/home/users/pasqua/Ambra/LIB/PSBLAS/lib -INSTALL_INCLUDEDIR=/home/users/pasqua/Ambra/LIB/PSBLAS/include -INSTALL_DOCSDIR=/home/users/pasqua/Ambra/LIB/PSBLAS/docs -INSTALL_SAMPLESDIR=/home/users/pasqua/Ambra/LIB/PSBLAS/samples +INSTALL_DIR=/opt/psblas/CAF/7.1.0-ext +INSTALL_LIBDIR=/opt/psblas/CAF/7.1.0-ext/lib +INSTALL_INCLUDEDIR=/opt/psblas/CAF/7.1.0-ext/include +INSTALL_DOCSDIR=/opt/psblas/CAF/7.1.0-ext/docs +INSTALL_SAMPLESDIR=/opt/psblas/CAF/7.1.0-ext/samples # the following is the flag for /bin/cp which shall copy the file only for updating (timestamp based)--on GNU Linux, '-u' CPUPDFLAG= @@ -75,12 +71,8 @@ FDEFINES=$(PSBFDEFINES) # These should be portable rules, arent they? .c.o: $(CC) $(CCOPT) $(CINCLUDES) $(CDEFINES) -c $< -o $@ -.f.o: - $(FC) $(FCOPT) $(FINCLUDES) -c $< -o $@ .f90.o: - $(F90) $(F90COPT) $(FINCLUDES) -c $< -o $@ -.F.o: - $(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c $< -o $@ + $(FC) $(FCOPT) $(FINCLUDES) -c $< -o $@ .F90.o: - $(F90) $(F90COPT) $(FINCLUDES) $(FDEFINES) -c $< -o $@ + $(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c $< -o $@ diff --git a/test/kernel/pdgenspmv.f90 b/test/kernel/pdgenspmv.f90 index e0d7c312..07bddf51 100644 --- a/test/kernel/pdgenspmv.f90 +++ b/test/kernel/pdgenspmv.f90 @@ -33,6 +33,7 @@ ! program pdgenspmv use psb_base_mod + use psi_mod use psb_util_mod implicit none @@ -46,17 +47,18 @@ program pdgenspmv real(psb_dpk_) :: t1, t2, tprec, flops, tflops, tt1, tt2, bdwdth ! sparse matrix and preconditioner - type(psb_dspmat_type) :: a + type(psb_dspmat_type) :: a, ad, ah + type(psb_d_csr_sparse_mat) :: acsr ! descriptor type(psb_desc_type) :: desc_a ! dense matrices type(psb_d_vect_type) :: xv,bv, vtst - real(psb_dpk_), allocatable :: tst(:) + real(psb_dpk_), allocatable :: tst(:), work(:) ! blacs parameters integer(psb_ipk_) :: ictxt, iam, np ! solver parameters - integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, nr + integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, nr, nrl, ncl, lwork integer(psb_long_int_k_) :: amatsize, precsize, descsize, d2size, annz, nbytes real(psb_dpk_) :: err, eps integer(psb_ipk_), parameter :: times=10 @@ -109,6 +111,12 @@ program pdgenspmv end if if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2 if (iam == psb_root_) write(psb_out_unit,'(" ")') + nrl = desc_a%get_local_rows() + ncl = desc_a%get_local_cols() + call a%csclip(ad,info,jmax=nrl) + call a%csclip(ah,info,jmin=nrl+1,jmax=ncl,cscale=.true.) + lwork = 2*ncl + allocate(work(lwork), stat=info) call xv%set(done) @@ -124,8 +132,13 @@ program pdgenspmv ! FIXME: cache flush needed here call psb_barrier(ictxt) tt1 = psb_wtime() - do i=1,times - call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'t') + do i=1,times + call psi_swapdata(psb_swap_send_,& + & dzero,xv%v,desc_a,work,info,data=psb_comm_halo_) + call psb_csmm(done,ad,xv,dzero,bv,info) + call psi_swapdata(psb_swap_recv_,& + & dzero,xv%v,desc_a,work,info,data=psb_comm_halo_) + call ah%a%csmv(done,xv%v%v(nrl+1:),done,bv%v%v,info) end do call psb_barrier(ictxt) tt2 = psb_wtime() - tt1 diff --git a/test/kernel/runs/spmv.inp b/test/kernel/runs/spmv.inp index 91844ce2..72025415 100644 --- a/test/kernel/runs/spmv.inp +++ b/test/kernel/runs/spmv.inp @@ -1,3 +1,3 @@ CSR -50 +80