From c586e5379915f082247a5853e6b081f749167471 Mon Sep 17 00:00:00 2001 From: Fabio Durastante Date: Wed, 14 Jan 2026 15:41:23 +0100 Subject: [PATCH] Added C functions for making available GPU support from the C interface --- cbind/base/Makefile | 5 + cbind/base/psb_base_tools_cbind_mod.F90 | 48 +- cbind/base/psb_c_base.h | 9 + cbind/base/psb_c_cbase.h | 6 +- cbind/base/psb_c_dbase.h | 6 +- cbind/base/psb_c_sbase.h | 8 +- cbind/base/psb_c_serial_cbind_mod.F90 | 2 +- cbind/base/psb_c_tools_cbind_mod.F90 | 169 +++++- cbind/base/psb_c_zbase.h | 6 +- .../{psb_cpenv_mod.f90 => psb_cpenv_mod.F90} | 61 +++ cbind/base/psb_d_serial_cbind_mod.F90 | 2 +- cbind/base/psb_d_tools_cbind_mod.F90 | 179 ++++++- cbind/base/psb_s_serial_cbind_mod.F90 | 2 +- cbind/base/psb_s_tools_cbind_mod.F90 | 179 ++++++- cbind/base/psb_z_serial_cbind_mod.F90 | 2 +- cbind/base/psb_z_tools_cbind_mod.F90 | 169 +++++- cbind/psb_c_base.h | 9 + cbind/psb_c_cbase.h | 6 +- cbind/psb_c_dbase.h | 6 +- cbind/psb_c_sbase.h | 8 +- cbind/psb_c_zbase.h | 6 +- cbind/test/gpu/Makefile | 50 ++ cbind/test/gpu/gputest.c | 64 +++ cbind/test/gpu/pdegen3dc.c | 496 ++++++++++++++++++ cbind/test/gpu/runs/pdegen3d.inp | 12 + 25 files changed, 1430 insertions(+), 80 deletions(-) rename cbind/base/{psb_cpenv_mod.f90 => psb_cpenv_mod.F90} (87%) create mode 100644 cbind/test/gpu/Makefile create mode 100644 cbind/test/gpu/gputest.c create mode 100644 cbind/test/gpu/pdegen3dc.c create mode 100644 cbind/test/gpu/runs/pdegen3d.inp diff --git a/cbind/base/Makefile b/cbind/base/Makefile index 2b744829..3ddc2eb2 100644 --- a/cbind/base/Makefile +++ b/cbind/base/Makefile @@ -26,6 +26,10 @@ OBJS=$(FOBJS) $(COBJS) LIBNAME=$(CBINDLIBNAME) +# Ensure C-interoperable modules are built with CUDA definitions where available. +.F90.o: + $(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) $(FCUDEFINES) -c $< -o $@ + objs: $(OBJS) $(CMOD) /bin/cp -p *$(.mod) $(CMOD) $(HERE) @@ -58,6 +62,7 @@ psb_c_comm_cbind_mod.o psb_z_comm_cbind_mod.o: psb_base_tools_cbind_mod.o psb_ob psb_base_psblas_cbind_mod.o: psb_s_psblas_cbind_mod.o psb_d_psblas_cbind_mod.o psb_c_psblas_cbind_mod.o psb_z_psblas_cbind_mod.o psb_cpenv_mod.o: psb_base_string_cbind_mod.o psb_objhandle_mod.o + $(FC) $(FCOPT) $(FINCLUDES) $(FCUDEFINES) -c psb_cpenv_mod.F90 -o psb_cpenv_mod.o veryclean: clean /bin/rm -f $(HERE)/$(LIBNAME) diff --git a/cbind/base/psb_base_tools_cbind_mod.F90 b/cbind/base/psb_base_tools_cbind_mod.F90 index 4029bfd2..70d4c742 100644 --- a/cbind/base/psb_base_tools_cbind_mod.F90 +++ b/cbind/base/psb_base_tools_cbind_mod.F90 @@ -3,8 +3,9 @@ module psb_base_tools_cbind_mod use psb_base_mod use psb_objhandle_mod use psb_cpenv_mod - use psb_base_string_cbind_mod - +#ifdef PSB_HAVE_CUDA + use psb_cuda_mod +#endif contains ! Aggiungere funzione per estrarre comunicatore @@ -270,6 +271,49 @@ contains end function psb_c_cdasb + function psb_c_cdasb_format(cdh,format) bind(c,name='psb_c_cdasb_format') result(res) + use psb_base_string_cbind_mod, only: stringc2f + implicit none + ! Takes as input the desired format bewten CPU or GPU, and assembles accordingly + ! via the mold parameter of psb_cdasb + + integer(psb_c_ipk_) :: res + type(psb_c_object_type) :: cdh + type(psb_desc_type), pointer :: descp + integer(psb_c_ipk_) :: info + character(c_char), dimension(*) :: format + + ! Local variables + character(len=6) :: fformat + ! mold variables +#ifdef PSB_HAVE_CUDA + type(psb_i_vect_cuda), target :: ivgpu +#endif + type(psb_i_base_vect_type), target :: ivect + class(psb_i_base_vect_type), pointer :: imold + + call stringc2f(format,fformat) + + res = -1 + select case (psb_toupper(fformat)) +#ifdef PSB_HAVE_CUDA + case('GPU','DEVICE') + imold => ivgpu +#endif + case('CPU','HOST') + imold => ivect + case default + write(psb_out_unit,*) 'psb_c_cdasb_format: Unknown format ',fformat + imold => ivect + end select + + if (c_associated(cdh%item)) then + call c_f_pointer(cdh%item,descp) + call psb_cdasb(descp,info,mold=imold) + res = info + end if + + end function psb_c_cdasb_format function psb_c_cdfree(cdh) bind(c,name='psb_c_cdfree') result(res) diff --git a/cbind/base/psb_c_base.h b/cbind/base/psb_c_base.h index ec130a9f..3f04fca3 100644 --- a/cbind/base/psb_c_base.h +++ b/cbind/base/psb_c_base.h @@ -57,6 +57,13 @@ extern "C" { psb_i_t psb_c_get_index_base(); void psb_c_set_index_base(psb_i_t base); + /* GPU environment routines */ + #ifdef PSB_HAVE_CUDA + void psb_c_cuda_init(psb_c_ctxt *cctxt); + void psb_c_cuda_init_opt(psb_c_ctxt *cctxt, psb_m_t ngpu); + void psb_c_cuda_exit(); + psb_m_t psb_c_cuda_getDeviceCount(); + #endif void psb_c_mbcast(psb_c_ctxt cctxt, psb_i_t n, psb_m_t *v, psb_i_t root); void psb_c_ibcast(psb_c_ctxt cctxt, psb_i_t n, psb_i_t *v, psb_i_t root); @@ -79,12 +86,14 @@ extern "C" { psb_i_t psb_c_cdall_nl(psb_i_t nl, psb_c_ctxt cctxt, psb_c_descriptor *cd); psb_i_t psb_c_cdall_repl(psb_l_t n, psb_c_ctxt cctxt, psb_c_descriptor *cd); psb_i_t psb_c_cdasb(psb_c_descriptor *cd); + psb_i_t psb_c_cdasb_format(psb_c_descriptor *cd, const char *afmt); psb_i_t psb_c_cdfree(psb_c_descriptor *cd); psb_i_t psb_c_cdins(psb_i_t nz, const psb_l_t *ia, const psb_l_t *ja, psb_c_descriptor *cd); psb_i_t psb_c_cdins_lidx(psb_i_t nz, const psb_l_t *ja, const psb_i_t *lidx, psb_c_descriptor *cd); bool psb_c_is_owned(psb_l_t gindex, psb_c_descriptor *cd); bool psb_c_cd_is_asb(psb_c_descriptor *cd); + psb_i_t psb_c_cd_get_local_rows(psb_c_descriptor *cd); psb_i_t psb_c_cd_get_local_cols(psb_c_descriptor *cd); psb_l_t psb_c_cd_get_global_rows(psb_c_descriptor *cd); diff --git a/cbind/base/psb_c_cbase.h b/cbind/base/psb_c_cbase.h index d33307af..dcf37965 100644 --- a/cbind/base/psb_c_cbase.h +++ b/cbind/base/psb_c_cbase.h @@ -34,6 +34,8 @@ psb_i_t psb_c_cgeins_add(psb_i_t nz, const psb_l_t *irw, const psb_c_t *val, psb_c_cvector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_cgeasb(psb_c_cvector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_cgeasb_options(psb_c_cvector *xh, psb_c_descriptor *cdh, psb_i_t dupl); +psb_i_t psb_c_cgeasb_options_format(psb_c_cvector *xh, psb_c_descriptor *cdh, + const char *fmt, psb_i_t dupl); psb_i_t psb_c_cgefree(psb_c_cvector *xh, psb_c_descriptor *cdh); psb_c_t psb_c_cgetelem(psb_c_cvector *xh,psb_l_t index,psb_c_descriptor *cd); @@ -56,8 +58,8 @@ psb_i_t psb_c_cset_matasb(psb_c_cspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_cset_matbld(psb_c_cspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_ccopy_mat(psb_c_cspmat *ah,psb_c_cspmat *bh,psb_c_descriptor *cdh); -/* psb_i_t psb_c_cspasb_opt(psb_c_cspmat *mh, psb_c_descriptor *cdh, */ -/* const char *afmt, psb_i_t upd, psb_i_t dupl); */ +psb_i_t psb_c_cspasb_opt(psb_c_cspmat *mh, psb_c_descriptor *cdh, + const char *afmt, psb_i_t upd, psb_i_t dupl); psb_i_t psb_c_csprn(psb_c_cspmat *mh, psb_c_descriptor *cdh, _Bool clear); psb_i_t psb_c_cmat_name_print(psb_c_cspmat *mh, char *name); psb_i_t psb_c_cvect_set_scal(psb_c_cvector *xh, psb_c_t val); diff --git a/cbind/base/psb_c_dbase.h b/cbind/base/psb_c_dbase.h index c615eba1..8c2a64d3 100644 --- a/cbind/base/psb_c_dbase.h +++ b/cbind/base/psb_c_dbase.h @@ -34,6 +34,8 @@ psb_i_t psb_c_dgeins_add(psb_i_t nz, const psb_l_t *irw, const psb_d_t *val, psb_c_dvector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_dgeasb(psb_c_dvector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_dgeasb_options(psb_c_dvector *xh, psb_c_descriptor *cdh, psb_i_t dupl); +psb_i_t psb_c_dgeasb_options_format(psb_c_dvector *xh, psb_c_descriptor *cdh, + const char *fmt, psb_i_t dupl); psb_i_t psb_c_dgefree(psb_c_dvector *xh, psb_c_descriptor *cdh); psb_d_t psb_c_dgetelem(psb_c_dvector *xh,psb_l_t index,psb_c_descriptor *cd); @@ -56,8 +58,8 @@ psb_i_t psb_c_dset_matasb(psb_c_dspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_dset_matbld(psb_c_dspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_dcopy_mat(psb_c_dspmat *ah,psb_c_dspmat *bh,psb_c_descriptor *cdh); -/* psb_i_t psb_c_dspasb_opt(psb_c_dspmat *mh, psb_c_descriptor *cdh, */ -/* const char *afmt, psb_i_t upd, psb_i_t dupl); */ +psb_i_t psb_c_dspasb_opt(psb_c_dspmat *mh, psb_c_descriptor *cdh, + const char *afmt, psb_i_t upd, psb_i_t dupl); psb_i_t psb_c_dsprn(psb_c_dspmat *mh, psb_c_descriptor *cdh, _Bool clear); psb_i_t psb_c_dmat_name_print(psb_c_dspmat *mh, char *name); psb_i_t psb_c_dvect_set_scal(psb_c_dvector *xh, psb_d_t val); diff --git a/cbind/base/psb_c_sbase.h b/cbind/base/psb_c_sbase.h index 875cacca..f132e707 100644 --- a/cbind/base/psb_c_sbase.h +++ b/cbind/base/psb_c_sbase.h @@ -34,6 +34,8 @@ psb_i_t psb_c_sgeins_add(psb_i_t nz, const psb_l_t *irw, const psb_s_t *val, psb_c_svector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_sgeasb(psb_c_svector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_sgeasb_options(psb_c_svector *xh, psb_c_descriptor *cdh, psb_i_t dupl); +psb_i_t psb_c_sgeasb_options_format(psb_c_svector *xh, psb_c_descriptor *cdh, + const char *fmt, psb_i_t dupl); psb_i_t psb_c_sgefree(psb_c_svector *xh, psb_c_descriptor *cdh); psb_s_t psb_c_sgetelem(psb_c_svector *xh,psb_l_t index,psb_c_descriptor *cd); @@ -55,9 +57,9 @@ psb_i_t psb_c_sset_matupd(psb_c_sspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_sset_matasb(psb_c_sspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_sset_matbld(psb_c_sspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_scopy_mat(psb_c_sspmat *ah,psb_c_sspmat *bh,psb_c_descriptor *cdh); - -/* psb_i_t psb_c_sspasb_opt(psb_c_sspmat *mh, psb_c_descriptor *cdh, */ -/* const char *afmt, psb_i_t upd, psb_i_t dupl); */ + + psb_i_t psb_c_sspasb_opt(psb_c_sspmat *mh, psb_c_descriptor *cdh, + const char *afmt, psb_i_t upd, psb_i_t dupl); psb_i_t psb_c_ssprn(psb_c_sspmat *mh, psb_c_descriptor *cdh, _Bool clear); psb_i_t psb_c_smat_name_print(psb_c_sspmat *mh, char *name); psb_i_t psb_c_svect_set_scal(psb_c_svector *xh, psb_s_t val); diff --git a/cbind/base/psb_c_serial_cbind_mod.F90 b/cbind/base/psb_c_serial_cbind_mod.F90 index c0feeebd..04e21d33 100644 --- a/cbind/base/psb_c_serial_cbind_mod.F90 +++ b/cbind/base/psb_c_serial_cbind_mod.F90 @@ -2,7 +2,7 @@ module psb_c_serial_cbind_mod use iso_c_binding use psb_base_mod use psb_objhandle_mod - use psb_base_string_cbind_mod +! use psb_base_string_cbind_mod use psb_base_tools_cbind_mod contains diff --git a/cbind/base/psb_c_tools_cbind_mod.F90 b/cbind/base/psb_c_tools_cbind_mod.F90 index b3270c58..b8aedc49 100644 --- a/cbind/base/psb_c_tools_cbind_mod.F90 +++ b/cbind/base/psb_c_tools_cbind_mod.F90 @@ -3,8 +3,10 @@ module psb_c_tools_cbind_mod use psb_base_mod use psb_cpenv_mod use psb_objhandle_mod - use psb_base_string_cbind_mod use psb_base_tools_cbind_mod +#ifdef PSB_HAVE_CUDA + use psb_cuda_mod +#endif contains @@ -160,6 +162,63 @@ contains return end function psb_c_cgeasb_options + + function psb_c_cgeasb_options_format(xh,cdh,dupl,format) bind(c) result(res) + ! Takes into account format argument as a c string, and uses it to call the appropriate psb_geasb + ! with mold argument + use psb_base_string_cbind_mod, only: stringc2f + implicit none + integer(psb_c_ipk_) :: res + type(psb_c_cvector) :: xh + type(psb_c_descriptor) :: cdh + character(kind=c_char), dimension(*) :: format + integer(psb_c_ipk_), value :: dupl + + ! Local variables + character(len=6) :: fformat + type(psb_desc_type), pointer :: descp + type(psb_c_vect_type), pointer :: xp + integer(psb_c_ipk_) :: info + ! mold variables +#ifdef PSB_HAVE_CUDA + type(psb_c_vect_cuda), target :: vgpu +#endif + type(psb_c_base_vect_type), target :: vect + class(psb_c_base_vect_type), pointer :: vmold + + ! Select mold based on format + call stringc2f(format,fformat) + + select case (psb_toupper(fformat)) +#ifdef PSB_HAVE_CUDA + case('GPU','DEVICE') + vmold => vgpu +#endif + case('CPU','HOST') + vmold => vect + case default + write(psb_out_unit,*) 'psb_c_cgeasb_options_format: Unknown format ',fformat + vmold => vect + end select + res = -1 + + if (c_associated(cdh%item)) then + call c_f_pointer(cdh%item,descp) + else + return + end if + if (c_associated(xh%item)) then + call c_f_pointer(xh%item,xp) + else + return + end if + + call psb_geasb(xp,descp,info,dupl=dupl,mold=vmold) + res = min(0,info) + + return + end function psb_c_cgeasb_options_format + function psb_c_cgefree(xh,cdh) bind(c) result(res) @@ -352,48 +411,130 @@ contains end function psb_c_cspfree -#if 0 - function psb_c_cspasb_opt(mh,cdh,afmt,upd) bind(c) result(res) -#ifdef PSB_HAVE_LIBRSB + function psb_c_cspasb_opt(mh,cdh,afmt,upd,dupl) bind(c) result(res) + +#if 0 +#ifdef PSB_HAVE_LIBRSB use psb_c_rsb_mat_mod #endif +#endif +#if defined(PSB_HAVE_CUDA) + use psb_cuda_mod +#endif + use psb_ext_mod implicit none integer(psb_c_ipk_) :: res - integer(psb_c_ipk_), value :: cdh, mh,upd,dupl + type(psb_c_cspmat) :: mh + type(psb_c_descriptor) :: cdh + integer(psb_c_ipk_), value :: upd,dupl character(c_char) :: afmt(*) integer(psb_c_ipk_) :: info,n character(len=5) :: fafmt + integer(psb_ipk_), parameter :: hksz = 32 + ! mold variables +#if 0 #ifdef PSB_HAVE_LIBRSB type(psb_c_rsb_sparse_mat) :: arsb #endif +#endif + type(psb_c_ell_sparse_mat), target :: aell + type(psb_c_csr_sparse_mat), target :: acsr + type(psb_c_coo_sparse_mat), target :: acoo + type(psb_c_hll_sparse_mat), target :: ahll + type(psb_c_hdia_sparse_mat), target :: ahdia + type(psb_c_dns_sparse_mat), target :: adns +#if defined(PSB_HAVE_CUDA) + type(psb_c_cuda_hlg_sparse_mat), target :: ahlg + type(psb_c_cuda_csrg_sparse_mat), target :: acsrg + type(psb_c_cuda_elg_sparse_mat), target :: aelg +#endif + class(psb_c_base_sparse_mat), pointer :: amold + !Local variables + type(psb_desc_type), pointer :: descp + type(psb_cspmat_type), pointer :: ap res = -1 - call psb_check_descriptor_handle(cdh,info) - if (info < 0) return - call psb_check_double_spmat_handle(mh,info) - if (info < 0) return - + if (c_associated(cdh%item)) then + call c_f_pointer(cdh%item,descp) + else + return + end if + if (c_associated(mh%item)) then + call c_f_pointer(mh%item,ap) + else + return + end if call stringc2f(afmt,fafmt) + + ! Set the mold variable based on afmt + select case (psb_toupper(fafmt)) +#if defined(PSB_HAVE_CUDA) + case('ELG') + amold => aelg + case('HLG') + call psi_set_hksz(hksz) + amold => ahlg + case('CSRG') + amold => acsrg + case('ELL') + amold => aell + case('HLL') + call psi_set_hksz(hksz) + amold => ahll + case('CSR') + amold => acsr + case('DNS') + amold => adns + case default + write(*,*) 'Unknown format defaulting to HLG' + amold => ahlg + end select +#else + select case(psb_toupper(fafmt)) + case('ELL') + amold => aell + case('HLL') + call psi_set_hksz(hksz) + amold => ahll + amold => ahdia + case('CSR') + amold => acsr + case('DNS') + amold => adns + case default + write(*,*) 'Unknown format defaulting to CSR' + amold => acsr + end select +#endif + select case(fafmt) +#if 0 #ifdef PSB_HAVE_LIBRSB case('RSB') call psb_spasb(double_spmat_pool(mh)%item,descriptor_pool(cdh)%item,info,& & upd=upd,mold=arsb) #endif +#endif + case('ELL','HLL','CSR','DNS') + call psb_spasb(ap,descp,info,upd=upd,mold=amold) +#if defined(PSB_HAVE_CUDA) + case('ELG','HLG','CSRG') + call psb_spasb(ap,descp,info,upd=upd,mold=amold) +#endif case default - call psb_spasb(double_spmat_pool(mh)%item,descriptor_pool(cdh)%item,info,& - & afmt=fafmt,upd=upd) + write(psb_out_unit,*) 'psb_c_cspasb_opt: Unknown format ',fafmt + call psb_spasb(ap,descp,info,afmt=fafmt,upd=upd,dupl=dupl) end select res = min(0,info) return end function psb_c_cspasb_opt -#endif - function psb_c_cspins(nz,irw,icl,val,mh,cdh) bind(c) result(res) + + function psb_c_cspins(nz,irw,icl,val,mh,cdh) bind(c) result(res) implicit none integer(psb_c_ipk_) :: res diff --git a/cbind/base/psb_c_zbase.h b/cbind/base/psb_c_zbase.h index 4d67b703..40bff485 100644 --- a/cbind/base/psb_c_zbase.h +++ b/cbind/base/psb_c_zbase.h @@ -34,6 +34,8 @@ psb_i_t psb_c_zgeins_add(psb_i_t nz, const psb_l_t *irw, const psb_z_t *val, psb_c_zvector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_zgeasb(psb_c_zvector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_zgeasb_options(psb_c_zvector *xh, psb_c_descriptor *cdh, psb_i_t dupl); +psb_i_t psb_c_zgeasb_options_format(psb_c_zvector *xh, psb_c_descriptor *cdh, + const char *fmt, psb_i_t dupl); psb_i_t psb_c_zgefree(psb_c_zvector *xh, psb_c_descriptor *cdh); psb_z_t psb_c_zgetelem(psb_c_zvector *xh,psb_l_t index,psb_c_descriptor *cd); @@ -57,8 +59,8 @@ psb_i_t psb_c_zset_matbld(psb_c_zspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_zcopy_mat(psb_c_zspmat *ah,psb_c_zspmat *bh,psb_c_descriptor *cdh); -/* psb_i_t psb_c_zspasb_opt(psb_c_zspmat *mh, psb_c_descriptor *cdh, */ -/* const char *afmt, psb_i_t upd, psb_i_t dupl); */ +psb_i_t psb_c_zspasb_opt(psb_c_zspmat *mh, psb_c_descriptor *cdh, + const char *afmt, psb_i_t upd, psb_i_t dupl); psb_i_t psb_c_zsprn(psb_c_zspmat *mh, psb_c_descriptor *cdh, _Bool clear); psb_i_t psb_c_zmat_name_print(psb_c_zspmat *mh, char *name); psb_i_t psb_c_zvect_set_scal(psb_c_zvector *xh, psb_z_t val); diff --git a/cbind/base/psb_cpenv_mod.f90 b/cbind/base/psb_cpenv_mod.F90 similarity index 87% rename from cbind/base/psb_cpenv_mod.f90 rename to cbind/base/psb_cpenv_mod.F90 index 6bc67bfb..394b8157 100644 --- a/cbind/base/psb_cpenv_mod.f90 +++ b/cbind/base/psb_cpenv_mod.F90 @@ -50,6 +50,67 @@ contains end subroutine psb_c_init +#ifdef PSB_HAVE_CUDA + subroutine psb_c_cuda_init(cctxt) bind(c, name="psb_c_cuda_init") + use psb_base_mod, only : psb_ctxt_type + use psb_cuda_mod, only : psb_cuda_init + implicit none + + type(psb_c_object_type) :: cctxt + type(psb_ctxt_type), pointer :: ctxt + integer :: info + + if (c_associated(cctxt%item)) then + call c_f_pointer(cctxt%item,ctxt) + end if + call psb_cuda_init(ctxt) + cctxt%item = c_loc(ctxt) + end subroutine psb_c_cuda_init + + subroutine psb_c_cuda_init_opt(cctxt,cdevice) bind(c, name="psb_c_cuda_init_opt") + use psb_base_mod, only : psb_ctxt_type + use psb_cuda_mod, only : psb_cuda_init + implicit none + + type(psb_c_object_type) :: cctxt + type(psb_ctxt_type), pointer :: ctxt + integer(psb_c_mpk_), value :: cdevice + integer :: info + ! Local variables + integer(psb_mpk_) :: cdevice_f + + cdevice_f = cdevice + + if (c_associated(cctxt%item)) then + call c_f_pointer(cctxt%item,ctxt) + end if + call psb_cuda_init(ctxt,cdevice_f) + cctxt%item = c_loc(ctxt) + + end subroutine psb_c_cuda_init_opt + + subroutine psb_c_cuda_exit() bind(c, name="psb_c_cuda_exit") + use psb_cuda_mod, only : psb_cuda_exit + implicit none + + call psb_cuda_exit() + return + end subroutine psb_c_cuda_exit + + function psb_c_cuda_getDeviceCount() bind(c, name="psb_c_cuda_getDeviceCount") result(res) + use psb_cuda_mod, only : psb_cuda_getDeviceCount + implicit none + integer(psb_c_ipk_) :: res + ! Local variables + integer(psb_ipk_) :: fres + + fres = psb_cuda_getDeviceCount() + res = fres + + return + end function psb_c_cuda_getDeviceCount +#endif + ! Get MPI_Fint from C, psb_c_object_type and start a psb_ctxt_type ! context from it. subroutine psb_c_init_from_fint(cctxt,fint) bind(c) diff --git a/cbind/base/psb_d_serial_cbind_mod.F90 b/cbind/base/psb_d_serial_cbind_mod.F90 index 96a5b32c..0e71e9ee 100644 --- a/cbind/base/psb_d_serial_cbind_mod.F90 +++ b/cbind/base/psb_d_serial_cbind_mod.F90 @@ -2,7 +2,7 @@ module psb_d_serial_cbind_mod use iso_c_binding use psb_base_mod use psb_objhandle_mod - use psb_base_string_cbind_mod +! use psb_base_string_cbind_mod use psb_base_tools_cbind_mod contains diff --git a/cbind/base/psb_d_tools_cbind_mod.F90 b/cbind/base/psb_d_tools_cbind_mod.F90 index ea11b922..c70d018a 100644 --- a/cbind/base/psb_d_tools_cbind_mod.F90 +++ b/cbind/base/psb_d_tools_cbind_mod.F90 @@ -3,8 +3,10 @@ module psb_d_tools_cbind_mod use psb_base_mod use psb_cpenv_mod use psb_objhandle_mod - use psb_base_string_cbind_mod use psb_base_tools_cbind_mod +#ifdef PSB_HAVE_CUDA + use psb_cuda_mod +#endif contains @@ -160,6 +162,63 @@ contains return end function psb_c_dgeasb_options + + function psb_c_dgeasb_options_format(xh,cdh,dupl,format) bind(c) result(res) + ! Takes into account format argument as a c string, and uses it to call the appropriate psb_geasb + ! with mold argument + use psb_base_string_cbind_mod, only: stringc2f + implicit none + integer(psb_c_ipk_) :: res + type(psb_c_dvector) :: xh + type(psb_c_descriptor) :: cdh + character(kind=c_char), dimension(*) :: format + integer(psb_c_ipk_), value :: dupl + + ! Local variables + character(len=6) :: fformat + type(psb_desc_type), pointer :: descp + type(psb_d_vect_type), pointer :: xp + integer(psb_c_ipk_) :: info + ! mold variables +#ifdef PSB_HAVE_CUDA + type(psb_d_vect_cuda), target :: vgpu +#endif + type(psb_d_base_vect_type), target :: vect + class(psb_d_base_vect_type), pointer :: vmold + + ! Select mold based on format + call stringc2f(format,fformat) + + select case (psb_toupper(fformat)) +#ifdef PSB_HAVE_CUDA + case('GPU','DEVICE') + vmold => vgpu +#endif + case('CPU','HOST') + vmold => vect + case default + write(psb_out_unit,*) 'psb_c_dgeasb_options_format: Unknown format ',fformat + vmold => vect + end select + res = -1 + + if (c_associated(cdh%item)) then + call c_f_pointer(cdh%item,descp) + else + return + end if + if (c_associated(xh%item)) then + call c_f_pointer(xh%item,xp) + else + return + end if + + call psb_geasb(xp,descp,info,dupl=dupl,mold=vmold) + res = min(0,info) + + return + end function psb_c_dgeasb_options_format + function psb_c_dgefree(xh,cdh) bind(c) result(res) @@ -352,48 +411,140 @@ contains end function psb_c_dspfree -#if 0 - function psb_c_dspasb_opt(mh,cdh,afmt,upd) bind(c) result(res) -#ifdef PSB_HAVE_LIBRSB + function psb_c_dspasb_opt(mh,cdh,afmt,upd,dupl) bind(c) result(res) + +#if 0 +#ifdef PSB_HAVE_LIBRSB use psb_d_rsb_mat_mod #endif +#endif +#if defined(PSB_HAVE_CUDA) + use psb_cuda_mod +#endif + use psb_ext_mod implicit none integer(psb_c_ipk_) :: res - integer(psb_c_ipk_), value :: cdh, mh,upd,dupl + type(psb_c_dspmat) :: mh + type(psb_c_descriptor) :: cdh + integer(psb_c_ipk_), value :: upd,dupl character(c_char) :: afmt(*) integer(psb_c_ipk_) :: info,n character(len=5) :: fafmt + integer(psb_ipk_), parameter :: hksz = 32 + ! mold variables +#if 0 #ifdef PSB_HAVE_LIBRSB type(psb_d_rsb_sparse_mat) :: arsb #endif +#endif + type(psb_d_ell_sparse_mat), target :: aell + type(psb_d_csr_sparse_mat), target :: acsr + type(psb_d_coo_sparse_mat), target :: acoo + type(psb_d_hll_sparse_mat), target :: ahll + type(psb_d_hdia_sparse_mat), target :: ahdia + type(psb_d_dns_sparse_mat), target :: adns +#if defined(PSB_HAVE_CUDA) + type(psb_d_cuda_hlg_sparse_mat), target :: ahlg + type(psb_d_cuda_hdiag_sparse_mat), target :: ahdiag + type(psb_d_cuda_csrg_sparse_mat), target :: acsrg + type(psb_d_cuda_elg_sparse_mat), target :: aelg +#endif + class(psb_d_base_sparse_mat), pointer :: amold + !Local variables + type(psb_desc_type), pointer :: descp + type(psb_dspmat_type), pointer :: ap res = -1 - call psb_check_descriptor_handle(cdh,info) - if (info < 0) return - call psb_check_double_spmat_handle(mh,info) - if (info < 0) return - + if (c_associated(cdh%item)) then + call c_f_pointer(cdh%item,descp) + else + return + end if + if (c_associated(mh%item)) then + call c_f_pointer(mh%item,ap) + else + return + end if call stringc2f(afmt,fafmt) + + ! Set the mold variable based on afmt + select case (psb_toupper(fafmt)) +#if defined(PSB_HAVE_CUDA) + case('ELG') + amold => aelg + case('HLG') + call psi_set_hksz(hksz) + amold => ahlg + case('HDIAG') + amold => ahdiag + case('CSRG') + amold => acsrg + case('ELL') + amold => aell + case('HLL') + call psi_set_hksz(hksz) + amold => ahll + case('HDIA') + amold => ahdia + case('CSR') + amold => acsr + case('DNS') + amold => adns + case default + write(*,*) 'Unknown format defaulting to HLG' + amold => ahlg + end select +#else + select case(psb_toupper(fafmt)) + case('ELL') + amold => aell + case('HLL') + call psi_set_hksz(hksz) + amold => ahll + case('HDIA') + amold => ahdia + case('CSR') + amold => acsr + case('DNS') + amold => adns + case default + write(*,*) 'Unknown format defaulting to CSR' + amold => acsr + end select +#endif + select case(fafmt) +#if 0 #ifdef PSB_HAVE_LIBRSB case('RSB') call psb_spasb(double_spmat_pool(mh)%item,descriptor_pool(cdh)%item,info,& & upd=upd,mold=arsb) #endif +#endif + case('ELL','HLL','CSR','DNS') + call psb_spasb(ap,descp,info,upd=upd,mold=amold) + case('HDIA') + call psb_spasb(ap,descp,info,upd=upd,mold=amold) +#if defined(PSB_HAVE_CUDA) + case('ELG','HLG','CSRG') + call psb_spasb(ap,descp,info,upd=upd,mold=amold) + case('HDIAG') + call psb_spasb(ap,descp,info,upd=upd,mold=amold) +#endif case default - call psb_spasb(double_spmat_pool(mh)%item,descriptor_pool(cdh)%item,info,& - & afmt=fafmt,upd=upd) + write(psb_out_unit,*) 'psb_c_dspasb_opt: Unknown format ',fafmt + call psb_spasb(ap,descp,info,afmt=fafmt,upd=upd,dupl=dupl) end select res = min(0,info) return end function psb_c_dspasb_opt -#endif - function psb_c_dspins(nz,irw,icl,val,mh,cdh) bind(c) result(res) + + function psb_c_dspins(nz,irw,icl,val,mh,cdh) bind(c) result(res) implicit none integer(psb_c_ipk_) :: res diff --git a/cbind/base/psb_s_serial_cbind_mod.F90 b/cbind/base/psb_s_serial_cbind_mod.F90 index 0f1e6dce..2610e883 100644 --- a/cbind/base/psb_s_serial_cbind_mod.F90 +++ b/cbind/base/psb_s_serial_cbind_mod.F90 @@ -2,7 +2,7 @@ module psb_s_serial_cbind_mod use iso_c_binding use psb_base_mod use psb_objhandle_mod - use psb_base_string_cbind_mod +! use psb_base_string_cbind_mod use psb_base_tools_cbind_mod contains diff --git a/cbind/base/psb_s_tools_cbind_mod.F90 b/cbind/base/psb_s_tools_cbind_mod.F90 index 0b25a840..e93bb35f 100644 --- a/cbind/base/psb_s_tools_cbind_mod.F90 +++ b/cbind/base/psb_s_tools_cbind_mod.F90 @@ -3,8 +3,10 @@ module psb_s_tools_cbind_mod use psb_base_mod use psb_cpenv_mod use psb_objhandle_mod - use psb_base_string_cbind_mod use psb_base_tools_cbind_mod +#ifdef PSB_HAVE_CUDA + use psb_cuda_mod +#endif contains @@ -160,6 +162,63 @@ contains return end function psb_c_sgeasb_options + + function psb_c_sgeasb_options_format(xh,cdh,dupl,format) bind(c) result(res) + ! Takes into account format argument as a c string, and uses it to call the appropriate psb_geasb + ! with mold argument + use psb_base_string_cbind_mod, only: stringc2f + implicit none + integer(psb_c_ipk_) :: res + type(psb_c_svector) :: xh + type(psb_c_descriptor) :: cdh + character(kind=c_char), dimension(*) :: format + integer(psb_c_ipk_), value :: dupl + + ! Local variables + character(len=6) :: fformat + type(psb_desc_type), pointer :: descp + type(psb_s_vect_type), pointer :: xp + integer(psb_c_ipk_) :: info + ! mold variables +#ifdef PSB_HAVE_CUDA + type(psb_s_vect_cuda), target :: vgpu +#endif + type(psb_s_base_vect_type), target :: vect + class(psb_s_base_vect_type), pointer :: vmold + + ! Select mold based on format + call stringc2f(format,fformat) + + select case (psb_toupper(fformat)) +#ifdef PSB_HAVE_CUDA + case('GPU','DEVICE') + vmold => vgpu +#endif + case('CPU','HOST') + vmold => vect + case default + write(psb_out_unit,*) 'psb_c_sgeasb_options_format: Unknown format ',fformat + vmold => vect + end select + res = -1 + + if (c_associated(cdh%item)) then + call c_f_pointer(cdh%item,descp) + else + return + end if + if (c_associated(xh%item)) then + call c_f_pointer(xh%item,xp) + else + return + end if + + call psb_geasb(xp,descp,info,dupl=dupl,mold=vmold) + res = min(0,info) + + return + end function psb_c_sgeasb_options_format + function psb_c_sgefree(xh,cdh) bind(c) result(res) @@ -352,48 +411,140 @@ contains end function psb_c_sspfree -#if 0 - function psb_c_sspasb_opt(mh,cdh,afmt,upd) bind(c) result(res) -#ifdef PSB_HAVE_LIBRSB + function psb_c_sspasb_opt(mh,cdh,afmt,upd,dupl) bind(c) result(res) + +#if 0 +#ifdef PSB_HAVE_LIBRSB use psb_s_rsb_mat_mod #endif +#endif +#if defined(PSB_HAVE_CUDA) + use psb_cuda_mod +#endif + use psb_ext_mod implicit none integer(psb_c_ipk_) :: res - integer(psb_c_ipk_), value :: cdh, mh,upd,dupl + type(psb_c_sspmat) :: mh + type(psb_c_descriptor) :: cdh + integer(psb_c_ipk_), value :: upd,dupl character(c_char) :: afmt(*) integer(psb_c_ipk_) :: info,n character(len=5) :: fafmt + integer(psb_ipk_), parameter :: hksz = 32 + ! mold variables +#if 0 #ifdef PSB_HAVE_LIBRSB type(psb_s_rsb_sparse_mat) :: arsb #endif +#endif + type(psb_s_ell_sparse_mat), target :: aell + type(psb_s_csr_sparse_mat), target :: acsr + type(psb_s_coo_sparse_mat), target :: acoo + type(psb_s_hll_sparse_mat), target :: ahll + type(psb_s_hdia_sparse_mat), target :: ahdia + type(psb_s_dns_sparse_mat), target :: adns +#if defined(PSB_HAVE_CUDA) + type(psb_s_cuda_hlg_sparse_mat), target :: ahlg + type(psb_s_cuda_hdiag_sparse_mat), target :: ahdiag + type(psb_s_cuda_csrg_sparse_mat), target :: acsrg + type(psb_s_cuda_elg_sparse_mat), target :: aelg +#endif + class(psb_s_base_sparse_mat), pointer :: amold + !Local variables + type(psb_desc_type), pointer :: descp + type(psb_sspmat_type), pointer :: ap res = -1 - call psb_check_descriptor_handle(cdh,info) - if (info < 0) return - call psb_check_double_spmat_handle(mh,info) - if (info < 0) return - + if (c_associated(cdh%item)) then + call c_f_pointer(cdh%item,descp) + else + return + end if + if (c_associated(mh%item)) then + call c_f_pointer(mh%item,ap) + else + return + end if call stringc2f(afmt,fafmt) + + ! Set the mold variable based on afmt + select case (psb_toupper(fafmt)) +#if defined(PSB_HAVE_CUDA) + case('ELG') + amold => aelg + case('HLG') + call psi_set_hksz(hksz) + amold => ahlg + case('HDIAG') + amold => ahdiag + case('CSRG') + amold => acsrg + case('ELL') + amold => aell + case('HLL') + call psi_set_hksz(hksz) + amold => ahll + case('HDIA') + amold => ahdia + case('CSR') + amold => acsr + case('DNS') + amold => adns + case default + write(*,*) 'Unknown format defaulting to HLG' + amold => ahlg + end select +#else + select case(psb_toupper(fafmt)) + case('ELL') + amold => aell + case('HLL') + call psi_set_hksz(hksz) + amold => ahll + case('HDIA') + amold => ahdia + case('CSR') + amold => acsr + case('DNS') + amold => adns + case default + write(*,*) 'Unknown format defaulting to CSR' + amold => acsr + end select +#endif + select case(fafmt) +#if 0 #ifdef PSB_HAVE_LIBRSB case('RSB') call psb_spasb(double_spmat_pool(mh)%item,descriptor_pool(cdh)%item,info,& & upd=upd,mold=arsb) #endif +#endif + case('ELL','HLL','CSR','DNS') + call psb_spasb(ap,descp,info,upd=upd,mold=amold) + case('HDIA') + call psb_spasb(ap,descp,info,upd=upd,mold=amold) +#if defined(PSB_HAVE_CUDA) + case('ELG','HLG','CSRG') + call psb_spasb(ap,descp,info,upd=upd,mold=amold) + case('HDIAG') + call psb_spasb(ap,descp,info,upd=upd,mold=amold) +#endif case default - call psb_spasb(double_spmat_pool(mh)%item,descriptor_pool(cdh)%item,info,& - & afmt=fafmt,upd=upd) + write(psb_out_unit,*) 'psb_c_sspasb_opt: Unknown format ',fafmt + call psb_spasb(ap,descp,info,afmt=fafmt,upd=upd,dupl=dupl) end select res = min(0,info) return end function psb_c_sspasb_opt -#endif - function psb_c_sspins(nz,irw,icl,val,mh,cdh) bind(c) result(res) + + function psb_c_sspins(nz,irw,icl,val,mh,cdh) bind(c) result(res) implicit none integer(psb_c_ipk_) :: res diff --git a/cbind/base/psb_z_serial_cbind_mod.F90 b/cbind/base/psb_z_serial_cbind_mod.F90 index 742f7d93..aad86746 100644 --- a/cbind/base/psb_z_serial_cbind_mod.F90 +++ b/cbind/base/psb_z_serial_cbind_mod.F90 @@ -2,7 +2,7 @@ module psb_z_serial_cbind_mod use iso_c_binding use psb_base_mod use psb_objhandle_mod - use psb_base_string_cbind_mod +! use psb_base_string_cbind_mod use psb_base_tools_cbind_mod contains diff --git a/cbind/base/psb_z_tools_cbind_mod.F90 b/cbind/base/psb_z_tools_cbind_mod.F90 index 41c170eb..47aae520 100644 --- a/cbind/base/psb_z_tools_cbind_mod.F90 +++ b/cbind/base/psb_z_tools_cbind_mod.F90 @@ -3,8 +3,10 @@ module psb_z_tools_cbind_mod use psb_base_mod use psb_cpenv_mod use psb_objhandle_mod - use psb_base_string_cbind_mod use psb_base_tools_cbind_mod +#ifdef PSB_HAVE_CUDA + use psb_cuda_mod +#endif contains @@ -160,6 +162,63 @@ contains return end function psb_c_zgeasb_options + + function psb_c_zgeasb_options_format(xh,cdh,dupl,format) bind(c) result(res) + ! Takes into account format argument as a c string, and uses it to call the appropriate psb_geasb + ! with mold argument + use psb_base_string_cbind_mod, only: stringc2f + implicit none + integer(psb_c_ipk_) :: res + type(psb_c_zvector) :: xh + type(psb_c_descriptor) :: cdh + character(kind=c_char), dimension(*) :: format + integer(psb_c_ipk_), value :: dupl + + ! Local variables + character(len=6) :: fformat + type(psb_desc_type), pointer :: descp + type(psb_z_vect_type), pointer :: xp + integer(psb_c_ipk_) :: info + ! mold variables +#ifdef PSB_HAVE_CUDA + type(psb_z_vect_cuda), target :: vgpu +#endif + type(psb_z_base_vect_type), target :: vect + class(psb_z_base_vect_type), pointer :: vmold + + ! Select mold based on format + call stringc2f(format,fformat) + + select case (psb_toupper(fformat)) +#ifdef PSB_HAVE_CUDA + case('GPU','DEVICE') + vmold => vgpu +#endif + case('CPU','HOST') + vmold => vect + case default + write(psb_out_unit,*) 'psb_c_zgeasb_options_format: Unknown format ',fformat + vmold => vect + end select + res = -1 + + if (c_associated(cdh%item)) then + call c_f_pointer(cdh%item,descp) + else + return + end if + if (c_associated(xh%item)) then + call c_f_pointer(xh%item,xp) + else + return + end if + + call psb_geasb(xp,descp,info,dupl=dupl,mold=vmold) + res = min(0,info) + + return + end function psb_c_zgeasb_options_format + function psb_c_zgefree(xh,cdh) bind(c) result(res) @@ -352,48 +411,130 @@ contains end function psb_c_zspfree -#if 0 - function psb_c_zspasb_opt(mh,cdh,afmt,upd) bind(c) result(res) -#ifdef PSB_HAVE_LIBRSB + function psb_c_zspasb_opt(mh,cdh,afmt,upd,dupl) bind(c) result(res) + +#if 0 +#ifdef PSB_HAVE_LIBRSB use psb_z_rsb_mat_mod #endif +#endif +#if defined(PSB_HAVE_CUDA) + use psb_cuda_mod +#endif + use psb_ext_mod implicit none integer(psb_c_ipk_) :: res - integer(psb_c_ipk_), value :: cdh, mh,upd,dupl + type(psb_c_zspmat) :: mh + type(psb_c_descriptor) :: cdh + integer(psb_c_ipk_), value :: upd,dupl character(c_char) :: afmt(*) integer(psb_c_ipk_) :: info,n character(len=5) :: fafmt + integer(psb_ipk_), parameter :: hksz = 32 + ! mold variables +#if 0 #ifdef PSB_HAVE_LIBRSB type(psb_z_rsb_sparse_mat) :: arsb #endif +#endif + type(psb_z_ell_sparse_mat), target :: aell + type(psb_z_csr_sparse_mat), target :: acsr + type(psb_z_coo_sparse_mat), target :: acoo + type(psb_z_hll_sparse_mat), target :: ahll + type(psb_z_hdia_sparse_mat), target :: ahdia + type(psb_z_dns_sparse_mat), target :: adns +#if defined(PSB_HAVE_CUDA) + type(psb_z_cuda_hlg_sparse_mat), target :: ahlg + type(psb_z_cuda_csrg_sparse_mat), target :: acsrg + type(psb_z_cuda_elg_sparse_mat), target :: aelg +#endif + class(psb_z_base_sparse_mat), pointer :: amold + !Local variables + type(psb_desc_type), pointer :: descp + type(psb_zspmat_type), pointer :: ap res = -1 - call psb_check_descriptor_handle(cdh,info) - if (info < 0) return - call psb_check_double_spmat_handle(mh,info) - if (info < 0) return - + if (c_associated(cdh%item)) then + call c_f_pointer(cdh%item,descp) + else + return + end if + if (c_associated(mh%item)) then + call c_f_pointer(mh%item,ap) + else + return + end if call stringc2f(afmt,fafmt) + + ! Set the mold variable based on afmt + select case (psb_toupper(fafmt)) +#if defined(PSB_HAVE_CUDA) + case('ELG') + amold => aelg + case('HLG') + call psi_set_hksz(hksz) + amold => ahlg + case('CSRG') + amold => acsrg + case('ELL') + amold => aell + case('HLL') + call psi_set_hksz(hksz) + amold => ahll + case('CSR') + amold => acsr + case('DNS') + amold => adns + case default + write(*,*) 'Unknown format defaulting to HLG' + amold => ahlg + end select +#else + select case(psb_toupper(fafmt)) + case('ELL') + amold => aell + case('HLL') + call psi_set_hksz(hksz) + amold => ahll + amold => ahdia + case('CSR') + amold => acsr + case('DNS') + amold => adns + case default + write(*,*) 'Unknown format defaulting to CSR' + amold => acsr + end select +#endif + select case(fafmt) +#if 0 #ifdef PSB_HAVE_LIBRSB case('RSB') call psb_spasb(double_spmat_pool(mh)%item,descriptor_pool(cdh)%item,info,& & upd=upd,mold=arsb) #endif +#endif + case('ELL','HLL','CSR','DNS') + call psb_spasb(ap,descp,info,upd=upd,mold=amold) +#if defined(PSB_HAVE_CUDA) + case('ELG','HLG','CSRG') + call psb_spasb(ap,descp,info,upd=upd,mold=amold) +#endif case default - call psb_spasb(double_spmat_pool(mh)%item,descriptor_pool(cdh)%item,info,& - & afmt=fafmt,upd=upd) + write(psb_out_unit,*) 'psb_c_zspasb_opt: Unknown format ',fafmt + call psb_spasb(ap,descp,info,afmt=fafmt,upd=upd,dupl=dupl) end select res = min(0,info) return end function psb_c_zspasb_opt -#endif - function psb_c_zspins(nz,irw,icl,val,mh,cdh) bind(c) result(res) + + function psb_c_zspins(nz,irw,icl,val,mh,cdh) bind(c) result(res) implicit none integer(psb_c_ipk_) :: res diff --git a/cbind/psb_c_base.h b/cbind/psb_c_base.h index ec130a9f..3f04fca3 100644 --- a/cbind/psb_c_base.h +++ b/cbind/psb_c_base.h @@ -57,6 +57,13 @@ extern "C" { psb_i_t psb_c_get_index_base(); void psb_c_set_index_base(psb_i_t base); + /* GPU environment routines */ + #ifdef PSB_HAVE_CUDA + void psb_c_cuda_init(psb_c_ctxt *cctxt); + void psb_c_cuda_init_opt(psb_c_ctxt *cctxt, psb_m_t ngpu); + void psb_c_cuda_exit(); + psb_m_t psb_c_cuda_getDeviceCount(); + #endif void psb_c_mbcast(psb_c_ctxt cctxt, psb_i_t n, psb_m_t *v, psb_i_t root); void psb_c_ibcast(psb_c_ctxt cctxt, psb_i_t n, psb_i_t *v, psb_i_t root); @@ -79,12 +86,14 @@ extern "C" { psb_i_t psb_c_cdall_nl(psb_i_t nl, psb_c_ctxt cctxt, psb_c_descriptor *cd); psb_i_t psb_c_cdall_repl(psb_l_t n, psb_c_ctxt cctxt, psb_c_descriptor *cd); psb_i_t psb_c_cdasb(psb_c_descriptor *cd); + psb_i_t psb_c_cdasb_format(psb_c_descriptor *cd, const char *afmt); psb_i_t psb_c_cdfree(psb_c_descriptor *cd); psb_i_t psb_c_cdins(psb_i_t nz, const psb_l_t *ia, const psb_l_t *ja, psb_c_descriptor *cd); psb_i_t psb_c_cdins_lidx(psb_i_t nz, const psb_l_t *ja, const psb_i_t *lidx, psb_c_descriptor *cd); bool psb_c_is_owned(psb_l_t gindex, psb_c_descriptor *cd); bool psb_c_cd_is_asb(psb_c_descriptor *cd); + psb_i_t psb_c_cd_get_local_rows(psb_c_descriptor *cd); psb_i_t psb_c_cd_get_local_cols(psb_c_descriptor *cd); psb_l_t psb_c_cd_get_global_rows(psb_c_descriptor *cd); diff --git a/cbind/psb_c_cbase.h b/cbind/psb_c_cbase.h index d33307af..dcf37965 100644 --- a/cbind/psb_c_cbase.h +++ b/cbind/psb_c_cbase.h @@ -34,6 +34,8 @@ psb_i_t psb_c_cgeins_add(psb_i_t nz, const psb_l_t *irw, const psb_c_t *val, psb_c_cvector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_cgeasb(psb_c_cvector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_cgeasb_options(psb_c_cvector *xh, psb_c_descriptor *cdh, psb_i_t dupl); +psb_i_t psb_c_cgeasb_options_format(psb_c_cvector *xh, psb_c_descriptor *cdh, + const char *fmt, psb_i_t dupl); psb_i_t psb_c_cgefree(psb_c_cvector *xh, psb_c_descriptor *cdh); psb_c_t psb_c_cgetelem(psb_c_cvector *xh,psb_l_t index,psb_c_descriptor *cd); @@ -56,8 +58,8 @@ psb_i_t psb_c_cset_matasb(psb_c_cspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_cset_matbld(psb_c_cspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_ccopy_mat(psb_c_cspmat *ah,psb_c_cspmat *bh,psb_c_descriptor *cdh); -/* psb_i_t psb_c_cspasb_opt(psb_c_cspmat *mh, psb_c_descriptor *cdh, */ -/* const char *afmt, psb_i_t upd, psb_i_t dupl); */ +psb_i_t psb_c_cspasb_opt(psb_c_cspmat *mh, psb_c_descriptor *cdh, + const char *afmt, psb_i_t upd, psb_i_t dupl); psb_i_t psb_c_csprn(psb_c_cspmat *mh, psb_c_descriptor *cdh, _Bool clear); psb_i_t psb_c_cmat_name_print(psb_c_cspmat *mh, char *name); psb_i_t psb_c_cvect_set_scal(psb_c_cvector *xh, psb_c_t val); diff --git a/cbind/psb_c_dbase.h b/cbind/psb_c_dbase.h index c615eba1..8c2a64d3 100644 --- a/cbind/psb_c_dbase.h +++ b/cbind/psb_c_dbase.h @@ -34,6 +34,8 @@ psb_i_t psb_c_dgeins_add(psb_i_t nz, const psb_l_t *irw, const psb_d_t *val, psb_c_dvector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_dgeasb(psb_c_dvector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_dgeasb_options(psb_c_dvector *xh, psb_c_descriptor *cdh, psb_i_t dupl); +psb_i_t psb_c_dgeasb_options_format(psb_c_dvector *xh, psb_c_descriptor *cdh, + const char *fmt, psb_i_t dupl); psb_i_t psb_c_dgefree(psb_c_dvector *xh, psb_c_descriptor *cdh); psb_d_t psb_c_dgetelem(psb_c_dvector *xh,psb_l_t index,psb_c_descriptor *cd); @@ -56,8 +58,8 @@ psb_i_t psb_c_dset_matasb(psb_c_dspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_dset_matbld(psb_c_dspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_dcopy_mat(psb_c_dspmat *ah,psb_c_dspmat *bh,psb_c_descriptor *cdh); -/* psb_i_t psb_c_dspasb_opt(psb_c_dspmat *mh, psb_c_descriptor *cdh, */ -/* const char *afmt, psb_i_t upd, psb_i_t dupl); */ +psb_i_t psb_c_dspasb_opt(psb_c_dspmat *mh, psb_c_descriptor *cdh, + const char *afmt, psb_i_t upd, psb_i_t dupl); psb_i_t psb_c_dsprn(psb_c_dspmat *mh, psb_c_descriptor *cdh, _Bool clear); psb_i_t psb_c_dmat_name_print(psb_c_dspmat *mh, char *name); psb_i_t psb_c_dvect_set_scal(psb_c_dvector *xh, psb_d_t val); diff --git a/cbind/psb_c_sbase.h b/cbind/psb_c_sbase.h index 875cacca..f132e707 100644 --- a/cbind/psb_c_sbase.h +++ b/cbind/psb_c_sbase.h @@ -34,6 +34,8 @@ psb_i_t psb_c_sgeins_add(psb_i_t nz, const psb_l_t *irw, const psb_s_t *val, psb_c_svector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_sgeasb(psb_c_svector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_sgeasb_options(psb_c_svector *xh, psb_c_descriptor *cdh, psb_i_t dupl); +psb_i_t psb_c_sgeasb_options_format(psb_c_svector *xh, psb_c_descriptor *cdh, + const char *fmt, psb_i_t dupl); psb_i_t psb_c_sgefree(psb_c_svector *xh, psb_c_descriptor *cdh); psb_s_t psb_c_sgetelem(psb_c_svector *xh,psb_l_t index,psb_c_descriptor *cd); @@ -55,9 +57,9 @@ psb_i_t psb_c_sset_matupd(psb_c_sspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_sset_matasb(psb_c_sspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_sset_matbld(psb_c_sspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_scopy_mat(psb_c_sspmat *ah,psb_c_sspmat *bh,psb_c_descriptor *cdh); - -/* psb_i_t psb_c_sspasb_opt(psb_c_sspmat *mh, psb_c_descriptor *cdh, */ -/* const char *afmt, psb_i_t upd, psb_i_t dupl); */ + + psb_i_t psb_c_sspasb_opt(psb_c_sspmat *mh, psb_c_descriptor *cdh, + const char *afmt, psb_i_t upd, psb_i_t dupl); psb_i_t psb_c_ssprn(psb_c_sspmat *mh, psb_c_descriptor *cdh, _Bool clear); psb_i_t psb_c_smat_name_print(psb_c_sspmat *mh, char *name); psb_i_t psb_c_svect_set_scal(psb_c_svector *xh, psb_s_t val); diff --git a/cbind/psb_c_zbase.h b/cbind/psb_c_zbase.h index 4d67b703..40bff485 100644 --- a/cbind/psb_c_zbase.h +++ b/cbind/psb_c_zbase.h @@ -34,6 +34,8 @@ psb_i_t psb_c_zgeins_add(psb_i_t nz, const psb_l_t *irw, const psb_z_t *val, psb_c_zvector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_zgeasb(psb_c_zvector *xh, psb_c_descriptor *cdh); psb_i_t psb_c_zgeasb_options(psb_c_zvector *xh, psb_c_descriptor *cdh, psb_i_t dupl); +psb_i_t psb_c_zgeasb_options_format(psb_c_zvector *xh, psb_c_descriptor *cdh, + const char *fmt, psb_i_t dupl); psb_i_t psb_c_zgefree(psb_c_zvector *xh, psb_c_descriptor *cdh); psb_z_t psb_c_zgetelem(psb_c_zvector *xh,psb_l_t index,psb_c_descriptor *cd); @@ -57,8 +59,8 @@ psb_i_t psb_c_zset_matbld(psb_c_zspmat *mh,psb_c_descriptor *cdh); psb_i_t psb_c_zcopy_mat(psb_c_zspmat *ah,psb_c_zspmat *bh,psb_c_descriptor *cdh); -/* psb_i_t psb_c_zspasb_opt(psb_c_zspmat *mh, psb_c_descriptor *cdh, */ -/* const char *afmt, psb_i_t upd, psb_i_t dupl); */ +psb_i_t psb_c_zspasb_opt(psb_c_zspmat *mh, psb_c_descriptor *cdh, + const char *afmt, psb_i_t upd, psb_i_t dupl); psb_i_t psb_c_zsprn(psb_c_zspmat *mh, psb_c_descriptor *cdh, _Bool clear); psb_i_t psb_c_zmat_name_print(psb_c_zspmat *mh, char *name); psb_i_t psb_c_zvect_set_scal(psb_c_zvector *xh, psb_z_t val); diff --git a/cbind/test/gpu/Makefile b/cbind/test/gpu/Makefile new file mode 100644 index 00000000..7c0ae80e --- /dev/null +++ b/cbind/test/gpu/Makefile @@ -0,0 +1,50 @@ +TOP=../../.. +include $(TOP)/Make.inc +LIBDIR=$(TOP)/lib +INCLUDEDIR=$(TOP)/include +MODDIR=$(TOP)/modules/ +HERE=../.. + +FINCLUDES=$(FMFLAG). $(FMFLAG)$(HERE) $(FMFLAG)$(MODDIR) $(CUDA_INCLUDES) +CINCLUDES=-I. -I$(HERE) -I$(INCLUDEDIR) $(CUDA_INCLUDES) + +PSBC_LIBS= -L$(LIBDIR) -lpsb_cbind +# PSB_LIBS=-lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base -L$(LIBDIR) $(PSBGPULDLIBS) +PSB_LIBS=-lpsb_ext -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base -L$(LIBDIR) $(PSBGPULDLIBS) + + +# +# Compilers and such +# + +EXEDIR=./runs + +all: gputest pdedgen3dc + +gputest: gputest.o + $(FLINK) gputest.o -o gputest $(PSBC_LIBS) \ + $(PSB_LIBS) \ + -lm + /bin/mv gputest $(EXEDIR) + +pdedgen3dc: pdegen3dc.o + $(FLINK) pdegen3dc.o -o pdedgen3dc $(PSBC_LIBS) $(PSB_LIBS) -lstdc++ -lm + /bin/mv pdedgen3dc $(EXEDIR) + +.f90.o: + $(MPFC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c $< +.c.o: + $(MPCC) $(CCOPT) $(CINCLUDES) $(CDEFINES) -c $< + + +clean: + /bin/rm -f gputest.o $(EXEDIR)/gputest + /bin/rm -f pdedgen3dc.o $(EXEDIR)/pdedgen3dc +verycleanlib: + (cd ../..; make veryclean) +lib: + (cd ../../; make library) + +tests: all + cd runs ; ./gputest + cd runs ; ./pdedgen3dc < runs/pdegen3d.inp > pdegen3d.out \ No newline at end of file diff --git a/cbind/test/gpu/gputest.c b/cbind/test/gpu/gputest.c new file mode 100644 index 00000000..5c1e5a0a --- /dev/null +++ b/cbind/test/gpu/gputest.c @@ -0,0 +1,64 @@ +/*----------------------------------------------------------------------------------*/ +/* Parallel Sparse BLAS v 3.9.0 */ +/* (C) Copyright 2017 Salvatore Filippone */ +/* */ +/* Redistribution and use in source and binary forms, with or without */ +/* modification, are permitted provided that the following conditions */ +/* are met: */ +/* 1. Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* 2. Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions, and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution. */ +/* 3. The name of the PSBLAS group or the names of its contributors may */ +/* not be used to endorse or promote products derived from this */ +/* software without specific written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ +/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ +/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ +/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ +/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ +/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ +/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ +/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* File: gputest.c */ + +#include +#include +#include +#include + +#include "psb_base_cbind.h" + +int main(int argc, char **argv) +{ + psb_c_ctxt *cctxt; + psb_i_t iam, np; + + // Initialize PSBLAS context + cctxt = psb_c_new_ctxt(); + psb_c_init(cctxt); + // Initialze GPU support +#ifdef PSB_HAVE_CUDA + psb_c_cuda_init(cctxt); +#endif + + psb_c_info(*cctxt, &iam, &np); + printf("Hello from process %d of %d\n", iam, np); +#ifdef PSB_HAVE_CUDA + printf("Number of available GPU devices: %d seen by process %d\n", psb_c_cuda_getDeviceCount(), iam); +#endif + + // Exit from GPU support +#ifdef PSB_HAVE_CUDA + psb_c_cuda_exit(); +#endif + // Finalize PSBLAS context + psb_c_exit(*cctxt); + free(cctxt); +} \ No newline at end of file diff --git a/cbind/test/gpu/pdegen3dc.c b/cbind/test/gpu/pdegen3dc.c new file mode 100644 index 00000000..3096b2b7 --- /dev/null +++ b/cbind/test/gpu/pdegen3dc.c @@ -0,0 +1,496 @@ +/*----------------------------------------------------------------------------------*/ +/* Parallel Sparse BLAS v 3.5.0 */ +/* (C) Copyright 2017 Salvatore Filippone */ +/* */ +/* Redistribution and use in source and binary forms, with or without */ +/* modification, are permitted provided that the following conditions */ +/* are met: */ +/* 1. Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* 2. Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions, and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution. */ +/* 3. The name of the PSBLAS group or the names of its contributors may */ +/* not be used to endorse or promote products derived from this */ +/* software without specific written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ +/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ +/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ +/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ +/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ +/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ +/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ +/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* */ +/* File: pdegen3dc.c */ +/* */ +/* Program: pdegen3dc */ +/* This sample program shows how to build and solve a sparse linear */ +/* */ +/* The program solves a linear system based on the partial differential */ +/* equation */ +/* */ +/* */ +/* */ +/* The equation generated is */ +/* */ +/* b1 d d (u) b2 d d (u) a1 d (u)) a2 d (u))) */ +/* - ------ - ------ + ----- + ------ + a3 u = 0 */ +/* dx dx dy dy dx dy */ +/* */ +/* */ +/* with Dirichlet boundary conditions on the unit cube */ +/* */ +/* 0<=x,y,z<=1 */ +/* */ +/* The equation is discretized with finite differences and uniform stepsize; */ +/* the resulting discrete equation is */ +/* */ +/* ( u(x,y,z)(2b1+2b2+a1+a2)+u(x-1,y)(-b1-a1)+u(x,y-1)(-b2-a2)+ */ +/* -u(x+1,y)b1-u(x,y+1)b2)*(1/h**2) */ +/* */ +/* Example adapted from: C.T.Kelley */ +/* Iterative Methods for Linear and Nonlinear Equations */ +/* SIAM 1995 */ +/* */ +/* */ +/* In this sample program the index space of the discretized */ +/* computational domain is first numbered sequentially in a standard way, */ +/* then the corresponding vector is distributed according to an HPF BLOCK */ +/* distribution directive. The discretization ensures there are IDIM */ +/* *internal* points in each direction. */ +/* */ +/*----------------------------------------------------------------------------------*/ + +#include +#include +#include +#include + +#include "psb_base_cbind.h" +#include "psb_prec_cbind.h" +#include "psb_linsolve_cbind.h" + +#define LINEBUFSIZE 1024 +#define NBMAX 20 +#define DUMPMATRIX 0 + +double a1(double x, double y, double z) +{ + return (1.0 / 80.0); +} +double a2(double x, double y, double z) +{ + return (1.0 / 80.0); +} +double a3(double x, double y, double z) +{ + return (1.0 / 80.0); +} +double c(double x, double y, double z) +{ + return (0.0); +} +double b1(double x, double y, double z) +{ + return (0.0 / sqrt(3.0)); +} +double b2(double x, double y, double z) +{ + return (0.0 / sqrt(3.0)); +} +double b3(double x, double y, double z) +{ + return (0.0 / sqrt(3.0)); +} + +double g(double x, double y, double z) +{ + if (x == 1.0) + { + return (1.0); + } + else if (x == 0.0) + { + return (exp(-y * y - z * z)); + } + else + { + return (0.0); + } +} + +psb_i_t matgen(psb_c_ctxt cctxt, psb_i_t nl, psb_i_t idim, psb_l_t vl[], + psb_c_dspmat *ah, const char *afmt, + psb_c_descriptor *cdh, const char *cdfmt, + psb_c_dvector *xh, psb_c_dvector *bh, psb_c_dvector *rh) +{ + psb_i_t iam, np; + psb_l_t ix, iy, iz, el, glob_row; + psb_i_t i, k, info, ret; + double x, y, z, deltah, sqdeltah, deltah2; + double val[10 * NBMAX], zt[NBMAX]; + psb_l_t irow[10 * NBMAX], icol[10 * NBMAX]; + + info = 0; + psb_c_info(cctxt, &iam, &np); + deltah = (double)1.0 / (idim + 1); + sqdeltah = deltah * deltah; + deltah2 = 2.0 * deltah; + psb_c_set_index_base(0); + for (i = 0; i < nl; i++) + { + glob_row = vl[i]; + // if ((i%100000 == 0)||(i<10)) fprintf(stderr,"%d: generation loop at %d %ld \n",iam,i,glob_row); + el = 0; + ix = glob_row / (idim * idim); + iy = (glob_row - ix * idim * idim) / idim; + iz = glob_row - ix * idim * idim - iy * idim; + x = (ix + 1) * deltah; + y = (iy + 1) * deltah; + z = (iz + 1) * deltah; + zt[0] = 0.0; + /* internal point: build discretization */ + /* term depending on (x-1,y,z) */ + val[el] = -a1(x, y, z) / sqdeltah - b1(x, y, z) / deltah2; + if (ix == 0) + { + zt[0] += g(0.0, y, z) * (-val[el]); + } + else + { + icol[el] = (ix - 1) * idim * idim + (iy)*idim + (iz); + el = el + 1; + } + /* term depending on (x,y-1,z) */ + val[el] = -a2(x, y, z) / sqdeltah - b2(x, y, z) / deltah2; + if (iy == 0) + { + zt[0] += g(x, 0.0, z) * (-val[el]); + } + else + { + icol[el] = (ix)*idim * idim + (iy - 1) * idim + (iz); + el = el + 1; + } + /* term depending on (x,y,z-1)*/ + val[el] = -a3(x, y, z) / sqdeltah - b3(x, y, z) / deltah2; + if (iz == 0) + { + zt[0] += g(x, y, 0.0) * (-val[el]); + } + else + { + icol[el] = (ix)*idim * idim + (iy)*idim + (iz - 1); + el = el + 1; + } + /* term depending on (x,y,z)*/ + val[el] = 2.0 * (a1(x, y, z) + a2(x, y, z) + a3(x, y, z)) / sqdeltah + c(x, y, z); + icol[el] = (ix)*idim * idim + (iy)*idim + (iz); + el = el + 1; + /* term depending on (x,y,z+1) */ + val[el] = -a3(x, y, z) / sqdeltah + b3(x, y, z) / deltah2; + if (iz == idim - 1) + { + zt[0] += g(x, y, 1.0) * (-val[el]); + } + else + { + icol[el] = (ix)*idim * idim + (iy)*idim + (iz + 1); + el = el + 1; + } + /* term depending on (x,y+1,z) */ + val[el] = -a2(x, y, z) / sqdeltah + b2(x, y, z) / deltah2; + if (iy == idim - 1) + { + zt[0] += g(x, 1.0, z) * (-val[el]); + } + else + { + icol[el] = (ix)*idim * idim + (iy + 1) * idim + (iz); + el = el + 1; + } + /* term depending on (x+1,y,z) */ + val[el] = -a1(x, y, z) / sqdeltah + b1(x, y, z) / deltah2; + if (ix == idim - 1) + { + zt[0] += g(1.0, y, z) * (-val[el]); + } + else + { + icol[el] = (ix + 1) * idim * idim + (iy)*idim + (iz); + el = el + 1; + } + for (k = 0; k < el; k++) + irow[k] = glob_row; + if ((ret = psb_c_dspins(el, irow, icol, val, ah, cdh)) != 0) + fprintf(stderr, "From psb_c_dspins: %d\n", ret); + irow[0] = glob_row; + psb_c_dgeins(1, irow, zt, bh, cdh); + zt[0] = 0.0; + psb_c_dgeins(1, irow, zt, xh, cdh); + } + +#ifdef PSB_HAVE_CUDA + info = psb_c_cdasb_format(cdh, cdfmt); + if (info != 0) + return (info); + info = psb_c_dspasb_opt(ah, cdh, afmt, psb_upd_def_, psb_dupl_def_); + if (info != 0) + return (info); + info = psb_c_dgeasb_options_format(xh, cdh, psb_dupl_add_, cdfmt); + if (info != 0) + return (info); + info = psb_c_dgeasb_options_format(bh, cdh, psb_dupl_add_, cdfmt); + if (info != 0) + return (info); + info = psb_c_dgeasb_options_format(rh, cdh, psb_dupl_add_, cdfmt); + if (info != 0) + return (info); +#else + if ((info = psb_c_cdasb(cdh)) != 0) + return (info); + + if ((info = psb_c_dspasb(ah, cdh)) != 0) + return (info); + + if ((info = psb_c_dgeasb(xh, cdh)) != 0) + return (info); + if ((info = psb_c_dgeasb(bh, cdh)) != 0) + return (info); + if ((info = psb_c_dgeasb(rh, cdh)) != 0) + return (info); +#endif + return (info); +} + +int main(int argc, char *argv[]) +{ + psb_c_ctxt *cctxt; + psb_i_t iam, np; + char methd[40], ptype[20], afmt[8], cdfmt[8], buffer[LINEBUFSIZE + 1]; + psb_i_t nparms; + psb_i_t idim, info, istop, itmax, itrace, irst, iter, ret; + psb_c_dprec *ph; + psb_c_dspmat *ah; + psb_c_dvector *bh, *xh, *rh; + psb_i_t nb, nlr, nl; + psb_l_t i, ng, *vl, k; + double t1, t2, eps, err; + double *xv, *bv, *rv; + double one = 1.0, zero = 0.0, res2; + psb_c_SolverOptions options; + psb_c_descriptor *cdh; + FILE *vectfile; + + cctxt = psb_c_new_ctxt(); + psb_c_init(cctxt); +#ifdef PSB_HAVE_CUDA + psb_c_cuda_init(cctxt); +#endif + psb_c_info(*cctxt, &iam, &np); +#ifdef PSB_HAVE_CUDA + if (iam == 0) + printf("Example compiled with GPU support.\n"); +#endif + psb_c_barrier(*cctxt); + if (iam == 0) + { + fgets(buffer, LINEBUFSIZE, stdin); + sscanf(buffer, "%d ", &nparms); + fgets(buffer, LINEBUFSIZE, stdin); + sscanf(buffer, "%s", methd); + fgets(buffer, LINEBUFSIZE, stdin); + sscanf(buffer, "%s", ptype); + fgets(buffer, LINEBUFSIZE, stdin); + sscanf(buffer, "%s", afmt); + fgets(buffer, LINEBUFSIZE, stdin); + sscanf(buffer, "%s", cdfmt); + fgets(buffer, LINEBUFSIZE, stdin); + sscanf(buffer, "%d", &idim); + fgets(buffer, LINEBUFSIZE, stdin); + sscanf(buffer, "%d", &istop); + fgets(buffer, LINEBUFSIZE, stdin); + sscanf(buffer, "%d", &itmax); + fgets(buffer, LINEBUFSIZE, stdin); + sscanf(buffer, "%d", &itrace); + fgets(buffer, LINEBUFSIZE, stdin); + sscanf(buffer, "%d", &irst); + } + /* Now broadcast the values, and check they're OK */ + psb_c_ibcast(*cctxt, 1, &nparms, 0); + psb_c_hbcast(*cctxt, methd, 0); + psb_c_hbcast(*cctxt, ptype, 0); + psb_c_hbcast(*cctxt, afmt, 0); + psb_c_hbcast(*cctxt, cdfmt, 0); + psb_c_ibcast(*cctxt, 1, &idim, 0); + psb_c_ibcast(*cctxt, 1, &istop, 0); + psb_c_ibcast(*cctxt, 1, &itmax, 0); + psb_c_ibcast(*cctxt, 1, &itrace, 0); + psb_c_ibcast(*cctxt, 1, &irst, 0); + + fflush(stderr); + psb_c_barrier(*cctxt); + + cdh = psb_c_new_descriptor(); + psb_c_set_index_base(0); + + /* Simple minded BLOCK data distribution */ + ng = ((psb_l_t)idim) * idim * idim; + nb = (ng + np - 1) / np; + nl = nb; + if ((ng - iam * nb) < nl) + nl = ng - iam * nb; + if ((vl = malloc(nb * sizeof(psb_l_t))) == NULL) + { + fprintf(stderr, "On %d: malloc failure\n", iam); + psb_c_abort(*cctxt); + } + i = ((psb_l_t)iam) * nb; + for (k = 0; k < nl; k++) + vl[k] = i + k; + + if ((info = psb_c_cdall_vl(nl, vl, *cctxt, cdh)) != 0) + { + fprintf(stderr, "From cdall: %d\nBailing out\n", info); + psb_c_abort(*cctxt); + } + + bh = psb_c_new_dvector(); + xh = psb_c_new_dvector(); + rh = psb_c_new_dvector(); + ah = psb_c_new_dspmat(); + // fprintf(stderr,"From psb_c_new_dspmat: %p\n",ah); + + /* Allocate mem space for sparse matrix and vectors */ + ret = psb_c_dspall(ah, cdh); + // fprintf(stderr,"From psb_c_dspall: %d\n",ret); + psb_c_dgeall(bh, cdh); + psb_c_dgeall(xh, cdh); + psb_c_dgeall(rh, cdh); + + /* Matrix generation */ + if (matgen(*cctxt, nl, idim, vl, ah, afmt, cdh, cdfmt, xh, bh, rh) != 0) + { + fprintf(stderr, "Error during matrix build loop\n"); + psb_c_abort(*cctxt); + } + psb_c_barrier(*cctxt); + /* Set up the preconditioner */ + ph = psb_c_new_dprec(); + psb_c_dprecinit(*cctxt, ph, ptype); + ret = psb_c_dprecbld(ah, cdh, ph); + // fprintf(stderr,"From psb_c_dprecbld: %d\n",ret); + + /* Set up the solver options */ + psb_c_DefaultSolverOptions(&options); + options.eps = 1.e-9; + options.itmax = itmax; + options.irst = irst; + options.itrace = 1; + options.istop = istop; + psb_c_seterraction_ret(); + t1 = psb_c_wtime(); + ret = psb_c_dkrylov(methd, ah, ph, bh, xh, cdh, &options); + t2 = psb_c_wtime(); + iter = options.iter; + err = options.err; + // fprintf(stderr,"From krylov: %d %lf, %d %d\n",iter,err,ret,psb_c_get_errstatus()); + if (psb_c_get_errstatus() != 0) + { + psb_c_print_errmsg(); + } + // fprintf(stderr,"After cleanup %d\n",psb_c_get_errstatus()); + /* Check 2-norm of residual on exit */ + psb_c_dgeaxpby(one, bh, zero, rh, cdh); + psb_c_dspmm(-one, ah, xh, one, rh, cdh); + res2 = psb_c_dgenrm2(rh, cdh); + + if (iam == 0) + { + fprintf(stdout, "Time: %lf\n", (t2 - t1)); + fprintf(stdout, "Iter: %d\n", iter); + fprintf(stdout, "Err: %lg\n", err); + fprintf(stdout, "||r||_2: %lg\n", res2); + } + +#if DUMPATRIX + psb_c_dmat_name_print(ah, "cbindmat.mtx"); + nlr = psb_c_cd_get_local_rows(cdh); + bv = psb_c_dvect_get_cpy(bh); + vectfile = fopen("cbindb.mtx", "w"); + for (i = 0; i < nlr; i++) + fprintf(vectfile, "%lf\n", bv[i]); + fclose(vectfile); + + xv = psb_c_dvect_get_cpy(xh); + nlr = psb_c_cd_get_local_rows(cdh); + for (i = 0; i < nlr; i++) + fprintf(stdout, "SOL: %d %d %lf\n", iam, i, xv[i]); + + rv = psb_c_dvect_get_cpy(rh); + nlr = psb_c_cd_get_local_rows(cdh); + for (i = 0; i < nlr; i++) + fprintf(stdout, "RES: %d %d %lf\n", iam, i, rv[i]); + +#endif + + /* Clean up memory */ + if ((info = psb_c_dprecfree(ph)) != 0) + { + fprintf(stderr, "From dprecfree: %d\nBailing out\n", info); + psb_c_abort(*cctxt); + } + if ((info = psb_c_dgefree(xh, cdh)) != 0) + { + fprintf(stderr, "From dgefree: %d\nBailing out\n", info); + psb_c_abort(*cctxt); + } + if ((info = psb_c_dgefree(bh, cdh)) != 0) + { + fprintf(stderr, "From dgefree: %d\nBailing out\n", info); + psb_c_abort(*cctxt); + } + if ((info = psb_c_dgefree(rh, cdh)) != 0) + { + fprintf(stderr, "From dgefree: %d\nBailing out\n", info); + psb_c_abort(*cctxt); + } + if ((info = psb_c_dspfree(ah, cdh)) != 0) + { + fprintf(stderr, "From dspfree: %d\nBailing out\n", info); + psb_c_abort(*cctxt); + } + + if ((info = psb_c_cdfree(cdh)) != 0) + { + fprintf(stderr, "From cdfree: %d\nBailing out\n", info); + psb_c_abort(*cctxt); + } + // fprintf(stderr,"pointer from cdfree: %p\n",cdh->descriptor); + + /* Clean up object handles */ + free(ph); + free(xh); + free(bh); + free(rh); + free(ah); + free(cdh); + + /* further cleanup */ + free(vl); + // if (iam == 0) fprintf(stderr,"program completed successfully\n"); + + psb_c_barrier(*cctxt); +#ifdef PSB_HAVE_CUDA + psb_c_cuda_exit(); +#endif + /* Finalize PSBLAS context */ + psb_c_exit(*cctxt); + free(cctxt); +} diff --git a/cbind/test/gpu/runs/pdegen3d.inp b/cbind/test/gpu/runs/pdegen3d.inp new file mode 100644 index 00000000..c2bb3071 --- /dev/null +++ b/cbind/test/gpu/runs/pdegen3d.inp @@ -0,0 +1,12 @@ +7 Number of entries below this +BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES +BJAC ls Preconditioner NONE DIAG BJAC +CSRG A Storage format CSR COO (for CPU) CSRG HLG (for GPU) +DEVICE A Descriptor Storage format HOST/CPU or DEVICE/GPU +080 Domain size (acutal system is this**3) +1 Stopping criterion +80 MAXIT +01 ITRACE +20 IRST restart for RGMRES and BiCGSTABL + +