diff --git a/test/cudakern/Makefile b/test/cudakern/Makefile index fdd4f588..5d938973 100755 --- a/test/cudakern/Makefile +++ b/test/cudakern/Makefile @@ -24,9 +24,12 @@ DPGOBJS=dpdegenmv.o SPGOBJS=spdegenmv.o EXEDIR=./runs -all: pgen file +all: dir pgen file pgen: dpdegenmv spdegenmv file: s_file_spmv c_file_spmv d_file_spmv z_file_spmv +dpdegenmv spdegenmv s_file_spmv c_file_spmv d_file_spmv z_file_spmv: dir +dir: + (if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi) dpdegenmv: $(DPGOBJS) $(FLINK) $(LOPT) $(DPGOBJS) -fopenmp -o dpdegenmv $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) diff --git a/test/cudakern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 index db845d71..85059e81 100644 --- a/test/cudakern/dpdegenmv.F90 +++ b/test/cudakern/dpdegenmv.F90 @@ -70,6 +70,16 @@ contains ! ! functions parametrizing the differential equation ! + + ! + ! Note: b1, b2 and b3 are the coefficients of the first + ! derivative of the unknown function. The default + ! we apply here is to have them zero, so that the resulting + ! matrix is symmetric/hermitian and suitable for + ! testing with CG and FCG. + ! When testing methods for non-hermitian matrices you can + ! change the B1/B2/B3 functions to e.g. done/sqrt((3*done)) + ! function b1(x,y,z) use psb_base_mod, only : psb_dpk_, done, dzero implicit none @@ -138,7 +148,7 @@ contains ! the rhs. ! subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& - & f,amold,vmold,imold,partition,nrl,iv) + & f,amold,vmold,imold,partition,nrl,iv,tnd) use psb_base_mod use psb_util_mod ! @@ -169,7 +179,7 @@ contains class(psb_d_base_vect_type), optional :: vmold class(psb_i_base_vect_type), optional :: imold integer(psb_ipk_), optional :: partition, nrl,iv(:) - + logical, optional :: tnd ! Local variables. integer(psb_ipk_), parameter :: nb=20 @@ -198,6 +208,7 @@ contains real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb integer(psb_ipk_) :: err_act procedure(d_func_3d), pointer :: f_ + logical :: tnd_ character(len=20) :: name, ch_err,tmpfmt info = psb_success_ @@ -492,9 +503,9 @@ contains t1 = psb_wtime() if (info == psb_success_) then if (present(amold)) then - call psb_spasb(a,desc_a,info,mold=amold) + call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd) else - call psb_spasb(a,desc_a,info,afmt=afmt) + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd) end if end if call psb_barrier(ctxt) @@ -559,7 +570,7 @@ program pdgenmv ! input parameters character(len=5) :: acfmt, agfmt integer :: idim - + logical :: tnd ! miscellaneous real(psb_dpk_), parameter :: one = 1.d0 real(psb_dpk_) :: t1, t2, tprec, flops, tflops,& @@ -646,14 +657,14 @@ program pdgenmv ! ! get parameters ! - call get_parms(ctxt,acfmt,agfmt,idim) - + call get_parms(ctxt,acfmt,agfmt,idim,tnd) + call psb_init_timers() ! ! allocate and fill in the coefficient matrix and initial vectors ! call psb_barrier(ctxt) t1 = psb_wtime() - call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,'CSR ',info,partition=3) + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,'CSR ',info,partition=3,tnd=tnd) call psb_barrier(ctxt) t2 = psb_wtime() - t1 if(info /= psb_success_) then @@ -935,6 +946,7 @@ program pdgenmv write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize end if + call psb_print_timers(ctxt) ! ! cleanup storage and exit @@ -962,10 +974,11 @@ contains ! ! get iteration parameters from standard input ! - subroutine get_parms(ctxt,acfmt,agfmt,idim) + subroutine get_parms(ctxt,acfmt,agfmt,idim,tnd) type(psb_ctxt_type) :: ctxt character(len=*) :: agfmt, acfmt integer :: idim + logical :: tnd integer :: np, iam integer :: intbuf(10), ip @@ -978,17 +991,22 @@ contains read(psb_inp_unit,*) agfmt write(*,*) 'Size of discretization cube?' read(psb_inp_unit,*) idim + write(*,*) 'Try comm/comp overlap?' + read(psb_inp_unit,*) tnd endif call psb_bcast(ctxt,acfmt) call psb_bcast(ctxt,agfmt) call psb_bcast(ctxt,idim) - + call psb_bcast(ctxt,tnd) + if (iam == 0) then write(psb_out_unit,'("Testing matrix : ell1")') write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim write(psb_out_unit,'("Number of processors : ",i0)')np write(psb_out_unit,'("Data distribution : BLOCK")') write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Storage formats ",a)') acfmt,' ',agfmt + write(psb_out_unit,'("Testing overlap ND ",l8)') tnd end if return