Fix HAVE_CUDA in test programs

repack-nvid
sfilippone 11 months ago
parent b2b7b074df
commit 41491f7b9c

@ -547,7 +547,7 @@ program pdgenmv
use psb_base_mod use psb_base_mod
use psb_util_mod use psb_util_mod
use psb_ext_mod use psb_ext_mod
#ifdef HAVE_GPU #ifdef HAVE_CUDA
use psb_cuda_mod use psb_cuda_mod
#endif #endif
#ifdef HAVE_RSB #ifdef HAVE_RSB
@ -572,7 +572,7 @@ program pdgenmv
type(psb_desc_type) :: desc_a type(psb_desc_type) :: desc_a
! dense matrices ! dense matrices
type(psb_d_vect_type), target :: xv, bv, xg, bg type(psb_d_vect_type), target :: xv, bv, xg, bg
#ifdef HAVE_GPU #ifdef HAVE_CUDA
type(psb_d_vect_cuda) :: vmold type(psb_d_vect_cuda) :: vmold
type(psb_i_vect_cuda) :: imold type(psb_i_vect_cuda) :: imold
#endif #endif
@ -594,7 +594,7 @@ program pdgenmv
#ifdef HAVE_RSB #ifdef HAVE_RSB
type(psb_d_rsb_sparse_mat), target :: arsb type(psb_d_rsb_sparse_mat), target :: arsb
#endif #endif
#ifdef HAVE_GPU #ifdef HAVE_CUDA
type(psb_d_cuda_elg_sparse_mat), target :: aelg type(psb_d_cuda_elg_sparse_mat), target :: aelg
type(psb_d_cuda_csrg_sparse_mat), target :: acsrg type(psb_d_cuda_csrg_sparse_mat), target :: acsrg
#if CUDA_SHORT_VERSION <= 10 #if CUDA_SHORT_VERSION <= 10
@ -618,7 +618,7 @@ program pdgenmv
call psb_init(ctxt) call psb_init(ctxt)
call psb_info(ctxt,iam,np) call psb_info(ctxt,iam,np)
#ifdef HAVE_GPU #ifdef HAVE_CUDA
call psb_cuda_init(ctxt) call psb_cuda_init(ctxt)
#endif #endif
#ifdef HAVE_RSB #ifdef HAVE_RSB
@ -639,7 +639,7 @@ program pdgenmv
write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
write(*,*) 'This is the ',trim(name),' sample program' write(*,*) 'This is the ',trim(name),' sample program'
end if end if
#ifdef HAVE_GPU #ifdef HAVE_CUDA
write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount()
write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName())
#endif #endif
@ -698,7 +698,7 @@ program pdgenmv
stop stop
end if end if
#ifdef HAVE_GPU #ifdef HAVE_CUDA
select case(psb_toupper(agfmt)) select case(psb_toupper(agfmt))
case('ELG') case('ELG')
agmold => aelg agmold => aelg
@ -753,7 +753,7 @@ program pdgenmv
call xv%bld(x0) call xv%bld(x0)
call psb_geasb(bv,desc_a,info,scratch=.true.) call psb_geasb(bv,desc_a,info,scratch=.true.)
#ifdef HAVE_GPU #ifdef HAVE_CUDA
call aux_a%cscnv(agpu,info,mold=acoo) call aux_a%cscnv(agpu,info,mold=acoo)
call xg%bld(x0,mold=vmold) call xg%bld(x0,mold=vmold)
@ -780,7 +780,7 @@ program pdgenmv
t2 = psb_wtime() - t1 t2 = psb_wtime() - t1
call psb_amx(ctxt,t2) call psb_amx(ctxt,t2)
#ifdef HAVE_GPU #ifdef HAVE_CUDA
call xg%set(x0) call xg%set(x0)
! FIXME: cache flush needed here ! FIXME: cache flush needed here
@ -870,7 +870,7 @@ program pdgenmv
tflops = flops tflops = flops
gflops = flops * ngpu gflops = flops * ngpu
write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt()
#ifdef HAVE_GPU #ifdef HAVE_CUDA
write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt()
write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')&
& tcnvc1 & tcnvc1
@ -900,7 +900,7 @@ program pdgenmv
& t2*1.d3/(1.d0*ntests) & t2*1.d3/(1.d0*ntests)
write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')&
& flops/1.d6 & flops/1.d6
#ifdef HAVE_GPU #ifdef HAVE_CUDA
write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')&
& ntests, tt2 & ntests, tt2
write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')&
@ -925,7 +925,7 @@ program pdgenmv
bdwdth = ntests*nbytes/(t2*1.d6) bdwdth = ntests*nbytes/(t2*1.d6)
write(psb_out_unit,*) write(psb_out_unit,*)
write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth
#ifdef HAVE_GPU #ifdef HAVE_CUDA
bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) bdwdth = ngpu*ntests*nbytes/(gt2*1.d6)
write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth
bdwdth = psb_cuda_MemoryPeakBandwidth() bdwdth = psb_cuda_MemoryPeakBandwidth()
@ -949,7 +949,7 @@ program pdgenmv
call psb_errpush(info,name,a_err=ch_err) call psb_errpush(info,name,a_err=ch_err)
goto 9999 goto 9999
end if end if
#ifdef HAVE_GPU #ifdef HAVE_CUDA
call psb_cuda_exit() call psb_cuda_exit()
#endif #endif
call psb_exit(ctxt) call psb_exit(ctxt)

@ -547,7 +547,7 @@ program pdgenmv
use psb_base_mod use psb_base_mod
use psb_util_mod use psb_util_mod
use psb_ext_mod use psb_ext_mod
#ifdef HAVE_GPU #ifdef HAVE_CUDA
use psb_cuda_mod use psb_cuda_mod
#endif #endif
use psb_s_pde3d_mod use psb_s_pde3d_mod
@ -569,7 +569,7 @@ program pdgenmv
type(psb_desc_type) :: desc_a type(psb_desc_type) :: desc_a
! dense matrices ! dense matrices
type(psb_s_vect_type), target :: xv,bv, xg, bg type(psb_s_vect_type), target :: xv,bv, xg, bg
#ifdef HAVE_GPU #ifdef HAVE_CUDA
type(psb_s_vect_cuda) :: vmold type(psb_s_vect_cuda) :: vmold
type(psb_i_vect_cuda) :: imold type(psb_i_vect_cuda) :: imold
#endif #endif
@ -588,7 +588,7 @@ program pdgenmv
type(psb_s_hll_sparse_mat), target :: ahll type(psb_s_hll_sparse_mat), target :: ahll
type(psb_s_dia_sparse_mat), target :: adia type(psb_s_dia_sparse_mat), target :: adia
type(psb_s_hdia_sparse_mat), target :: ahdia type(psb_s_hdia_sparse_mat), target :: ahdia
#ifdef HAVE_GPU #ifdef HAVE_CUDA
type(psb_s_cuda_elg_sparse_mat), target :: aelg type(psb_s_cuda_elg_sparse_mat), target :: aelg
type(psb_s_cuda_csrg_sparse_mat), target :: acsrg type(psb_s_cuda_csrg_sparse_mat), target :: acsrg
#if CUDA_SHORT_VERSION <= 10 #if CUDA_SHORT_VERSION <= 10
@ -612,7 +612,7 @@ program pdgenmv
call psb_init(ctxt) call psb_init(ctxt)
call psb_info(ctxt,iam,np) call psb_info(ctxt,iam,np)
#ifdef HAVE_GPU #ifdef HAVE_CUDA
call psb_cuda_init(ctxt) call psb_cuda_init(ctxt)
#endif #endif
#ifdef HAVE_RSB #ifdef HAVE_RSB
@ -633,7 +633,7 @@ program pdgenmv
write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
write(*,*) 'This is the ',trim(name),' sample program' write(*,*) 'This is the ',trim(name),' sample program'
end if end if
#ifdef HAVE_GPU #ifdef HAVE_CUDA
write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount()
write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName())
#endif #endif
@ -692,7 +692,7 @@ program pdgenmv
stop stop
end if end if
#ifdef HAVE_GPU #ifdef HAVE_CUDA
select case(psb_toupper(agfmt)) select case(psb_toupper(agfmt))
case('ELG') case('ELG')
agmold => aelg agmold => aelg
@ -747,7 +747,7 @@ program pdgenmv
call xv%bld(x0) call xv%bld(x0)
call psb_geasb(bv,desc_a,info,scratch=.true.) call psb_geasb(bv,desc_a,info,scratch=.true.)
#ifdef HAVE_GPU #ifdef HAVE_CUDA
call aux_a%cscnv(agpu,info,mold=acoo) call aux_a%cscnv(agpu,info,mold=acoo)
call xg%bld(x0,mold=vmold) call xg%bld(x0,mold=vmold)
@ -774,7 +774,7 @@ program pdgenmv
t2 = psb_wtime() - t1 t2 = psb_wtime() - t1
call psb_amx(ctxt,t2) call psb_amx(ctxt,t2)
#ifdef HAVE_GPU #ifdef HAVE_CUDA
call xg%set(x0) call xg%set(x0)
! FIXME: cache flush needed here ! FIXME: cache flush needed here
@ -864,7 +864,7 @@ program pdgenmv
tflops = flops tflops = flops
gflops = flops * ngpu gflops = flops * ngpu
write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt()
#ifdef HAVE_GPU #ifdef HAVE_CUDA
write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt()
write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')&
& tcnvc1 & tcnvc1
@ -894,7 +894,7 @@ program pdgenmv
& t2*1.d3/(1.d0*ntests) & t2*1.d3/(1.d0*ntests)
write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')&
& flops/1.d6 & flops/1.d6
#ifdef HAVE_GPU #ifdef HAVE_CUDA
write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')&
& ntests, tt2 & ntests, tt2
write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')&
@ -919,7 +919,7 @@ program pdgenmv
bdwdth = ntests*nbytes/(t2*1.d6) bdwdth = ntests*nbytes/(t2*1.d6)
write(psb_out_unit,*) write(psb_out_unit,*)
write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth
#ifdef HAVE_GPU #ifdef HAVE_CUDA
bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) bdwdth = ngpu*ntests*nbytes/(gt2*1.d6)
write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth
bdwdth = psb_cuda_MemoryPeakBandwidth() bdwdth = psb_cuda_MemoryPeakBandwidth()
@ -943,7 +943,7 @@ program pdgenmv
call psb_errpush(info,name,a_err=ch_err) call psb_errpush(info,name,a_err=ch_err)
goto 9999 goto 9999
end if end if
#ifdef HAVE_GPU #ifdef HAVE_CUDA
call psb_cuda_exit() call psb_cuda_exit()
#endif #endif
call psb_exit(ctxt) call psb_exit(ctxt)

Loading…
Cancel
Save