diff --git a/cuda/Makefile b/cuda/Makefile index 2b0c011a..0f03e359 100755 --- a/cuda/Makefile +++ b/cuda/Makefile @@ -23,22 +23,22 @@ FOBJS=cusparse_mod.o base_cusparse_mod.o \ psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_i_vectordev_mod.o\ psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_base_vectordev_mod.o \ elldev_mod.o hlldev_mod.o diagdev_mod.o hdiagdev_mod.o \ - psb_i_gpu_vect_mod.o \ - psb_d_gpu_vect_mod.o psb_s_gpu_vect_mod.o\ - psb_z_gpu_vect_mod.o psb_c_gpu_vect_mod.o\ - psb_d_elg_mat_mod.o psb_d_hlg_mat_mod.o \ - psb_d_hybg_mat_mod.o psb_d_csrg_mat_mod.o\ - psb_s_elg_mat_mod.o psb_s_hlg_mat_mod.o \ - psb_s_hybg_mat_mod.o psb_s_csrg_mat_mod.o\ - psb_c_elg_mat_mod.o psb_c_hlg_mat_mod.o \ - psb_c_hybg_mat_mod.o psb_c_csrg_mat_mod.o\ - psb_z_elg_mat_mod.o psb_z_hlg_mat_mod.o \ - psb_z_hybg_mat_mod.o psb_z_csrg_mat_mod.o\ - psb_gpu_env_mod.o psb_gpu_mod.o \ - psb_d_diag_mat_mod.o\ - psb_d_hdiag_mat_mod.o psb_s_hdiag_mat_mod.o\ - psb_s_dnsg_mat_mod.o psb_d_dnsg_mat_mod.o \ - psb_c_dnsg_mat_mod.o psb_z_dnsg_mat_mod.o \ + psb_i_cuda_vect_mod.o \ + psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\ + psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\ + psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \ + psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\ + psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \ + psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\ + psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \ + psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\ + psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \ + psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\ + psb_cuda_env_mod.o psb_cuda_mod.o \ + psb_d_cuda_diag_mat_mod.o\ + psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\ + psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \ + psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o \ dnsdev_mod.o COBJS= elldev.o hlldev.o diagdev.o hdiagdev.o vectordev.o ivectordev.o dnsdev.o\ @@ -65,30 +65,30 @@ lib: ilib cudalib spgpulib /bin/cp -p $(LIBNAME) $(LIBDIR) dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o -psb_d_gpu_vect_mod.o psb_s_gpu_vect_mod.o psb_z_gpu_vect_mod.o psb_c_gpu_vect_mod.o: psb_i_gpu_vect_mod.o -psb_i_gpu_vect_mod.o : psb_vectordev_mod.o psb_gpu_env_mod.o +psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o: psb_i_cuda_vect_mod.o +psb_i_cuda_vect_mod.o : psb_vectordev_mod.o psb_cuda_env_mod.o cusparse_mod.o: s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o : base_cusparse_mod.o -psb_d_hlg_mat_mod.o: hlldev_mod.o psb_d_gpu_vect_mod.o psb_gpu_env_mod.o -psb_d_elg_mat_mod.o: elldev_mod.o psb_d_gpu_vect_mod.o -psb_d_diag_mat_mod.o: diagdev_mod.o psb_d_gpu_vect_mod.o -psb_d_hdiag_mat_mod.o: hdiagdev_mod.o psb_d_gpu_vect_mod.o -psb_s_dnsg_mat_mod.o: dnsdev_mod.o psb_s_gpu_vect_mod.o -psb_d_dnsg_mat_mod.o: dnsdev_mod.o psb_d_gpu_vect_mod.o -psb_c_dnsg_mat_mod.o: dnsdev_mod.o psb_c_gpu_vect_mod.o -psb_z_dnsg_mat_mod.o: dnsdev_mod.o psb_z_gpu_vect_mod.o -psb_s_hlg_mat_mod.o: hlldev_mod.o psb_s_gpu_vect_mod.o psb_gpu_env_mod.o -psb_s_elg_mat_mod.o: elldev_mod.o psb_s_gpu_vect_mod.o -psb_s_diag_mat_mod.o: diagdev_mod.o psb_s_gpu_vect_mod.o -psb_s_hdiag_mat_mod.o: hdiagdev_mod.o psb_s_gpu_vect_mod.o -psb_s_csrg_mat_mod.o psb_s_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o -psb_d_csrg_mat_mod.o psb_d_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o -psb_z_hlg_mat_mod.o: hlldev_mod.o psb_z_gpu_vect_mod.o psb_gpu_env_mod.o -psb_z_elg_mat_mod.o: elldev_mod.o psb_z_gpu_vect_mod.o -psb_c_hlg_mat_mod.o: hlldev_mod.o psb_c_gpu_vect_mod.o psb_gpu_env_mod.o -psb_c_elg_mat_mod.o: elldev_mod.o psb_c_gpu_vect_mod.o -psb_c_csrg_mat_mod.o psb_c_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o -psb_z_csrg_mat_mod.o psb_z_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_d_cuda_hlg_mat_mod.o: hlldev_mod.o psb_d_cuda_vect_mod.o psb_cuda_env_mod.o +psb_d_cuda_elg_mat_mod.o: elldev_mod.o psb_d_cuda_vect_mod.o +psb_d_cuda_diag_mat_mod.o: diagdev_mod.o psb_d_cuda_vect_mod.o +psb_d_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_d_cuda_vect_mod.o +psb_s_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_s_cuda_vect_mod.o +psb_d_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_d_cuda_vect_mod.o +psb_c_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_c_cuda_vect_mod.o +psb_z_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_z_cuda_vect_mod.o +psb_s_cuda_hlg_mat_mod.o: hlldev_mod.o psb_s_cuda_vect_mod.o psb_cuda_env_mod.o +psb_s_cuda_elg_mat_mod.o: elldev_mod.o psb_s_cuda_vect_mod.o +psb_s_cuda_diag_mat_mod.o: diagdev_mod.o psb_s_cuda_vect_mod.o +psb_s_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_s_cuda_vect_mod.o +psb_s_cuda_csrg_mat_mod.o psb_s_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_d_cuda_csrg_mat_mod.o psb_d_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_z_cuda_hlg_mat_mod.o: hlldev_mod.o psb_z_cuda_vect_mod.o psb_cuda_env_mod.o +psb_z_cuda_elg_mat_mod.o: elldev_mod.o psb_z_cuda_vect_mod.o +psb_c_cuda_hlg_mat_mod.o: hlldev_mod.o psb_c_cuda_vect_mod.o psb_cuda_env_mod.o +psb_c_cuda_elg_mat_mod.o: elldev_mod.o psb_c_cuda_vect_mod.o +psb_c_cuda_csrg_mat_mod.o psb_c_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_z_cuda_csrg_mat_mod.o psb_z_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o psb_vectordev_mod.o: psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_i_vectordev_mod.o psb_i_vectordev_mod.o psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o: psb_base_vectordev_mod.o vectordev.o: cuda_util.o vectordev.h @@ -101,22 +101,22 @@ svectordev.o: svectordev.h vectordev.h dvectordev.o: dvectordev.h vectordev.h cvectordev.o: cvectordev.h vectordev.h zvectordev.o: zvectordev.h vectordev.h -psb_gpu_env_mod.o: base_cusparse_mod.o -psb_gpu_mod.o: psb_gpu_env_mod.o psb_i_gpu_vect_mod.o\ - psb_d_gpu_vect_mod.o psb_s_gpu_vect_mod.o\ - psb_z_gpu_vect_mod.o psb_c_gpu_vect_mod.o\ - psb_d_elg_mat_mod.o psb_d_hlg_mat_mod.o \ - psb_d_hybg_mat_mod.o psb_d_csrg_mat_mod.o\ - psb_s_elg_mat_mod.o psb_s_hlg_mat_mod.o \ - psb_s_hybg_mat_mod.o psb_s_csrg_mat_mod.o\ - psb_c_elg_mat_mod.o psb_c_hlg_mat_mod.o \ - psb_c_hybg_mat_mod.o psb_c_csrg_mat_mod.o\ - psb_z_elg_mat_mod.o psb_z_hlg_mat_mod.o \ - psb_z_hybg_mat_mod.o psb_z_csrg_mat_mod.o\ - psb_d_diag_mat_mod.o \ - psb_d_hdiag_mat_mod.o psb_s_hdiag_mat_mod.o\ - psb_s_dnsg_mat_mod.o psb_d_dnsg_mat_mod.o \ - psb_c_dnsg_mat_mod.o psb_z_dnsg_mat_mod.o +psb_cuda_env_mod.o: base_cusparse_mod.o +psb_cuda_mod.o: psb_cuda_env_mod.o psb_i_cuda_vect_mod.o\ + psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\ + psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\ + psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \ + psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\ + psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \ + psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\ + psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \ + psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\ + psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \ + psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\ + psb_d_cuda_diag_mat_mod.o \ + psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\ + psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \ + psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o iobjs: $(FOBJS) $(MAKE) -C impl objs diff --git a/cuda/impl/Makefile b/cuda/impl/Makefile index 6ddac9a7..12bf0747 100755 --- a/cuda/impl/Makefile +++ b/cuda/impl/Makefile @@ -17,276 +17,276 @@ CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) OBJS= \ -psb_d_cp_csrg_from_coo.o \ -psb_d_cp_csrg_from_fmt.o \ -psb_d_cp_elg_from_coo.o \ -psb_d_cp_elg_from_fmt.o \ -psb_s_cp_csrg_from_coo.o \ -psb_s_cp_csrg_from_fmt.o \ -psb_s_csrg_allocate_mnnz.o \ -psb_s_csrg_csmm.o \ -psb_s_csrg_csmv.o \ -psb_s_csrg_mold.o \ -psb_s_csrg_reallocate_nz.o \ -psb_s_csrg_scal.o \ -psb_s_csrg_scals.o \ -psb_s_csrg_from_gpu.o \ -psb_s_csrg_to_gpu.o \ -psb_s_csrg_vect_mv.o \ -psb_s_csrg_inner_vect_sv.o \ -psb_d_csrg_allocate_mnnz.o \ -psb_d_csrg_csmm.o \ -psb_d_csrg_csmv.o \ -psb_d_csrg_mold.o \ -psb_d_csrg_reallocate_nz.o \ -psb_d_csrg_scal.o \ -psb_d_csrg_scals.o \ -psb_d_csrg_from_gpu.o \ -psb_d_csrg_to_gpu.o \ -psb_d_csrg_vect_mv.o \ -psb_d_csrg_inner_vect_sv.o \ -psb_d_elg_allocate_mnnz.o \ -psb_d_elg_asb.o \ -psb_d_elg_csmm.o \ -psb_d_elg_csmv.o \ -psb_d_elg_csput.o \ -psb_d_elg_from_gpu.o \ -psb_d_elg_inner_vect_sv.o \ -psb_d_elg_mold.o \ -psb_d_elg_reallocate_nz.o \ -psb_d_elg_scal.o \ -psb_d_elg_scals.o \ -psb_d_elg_to_gpu.o \ -psb_d_elg_vect_mv.o \ -psb_d_mv_csrg_from_coo.o \ -psb_d_mv_csrg_from_fmt.o \ -psb_d_mv_elg_from_coo.o \ -psb_d_mv_elg_from_fmt.o \ -psb_s_mv_csrg_from_coo.o \ -psb_s_mv_csrg_from_fmt.o \ -psb_s_cp_elg_from_coo.o \ -psb_s_cp_elg_from_fmt.o \ -psb_s_elg_allocate_mnnz.o \ -psb_s_elg_asb.o \ -psb_s_elg_csmm.o \ -psb_s_elg_csmv.o \ -psb_s_elg_csput.o \ -psb_s_elg_inner_vect_sv.o \ -psb_s_elg_mold.o \ -psb_s_elg_reallocate_nz.o \ -psb_s_elg_scal.o \ -psb_s_elg_scals.o \ -psb_s_elg_to_gpu.o \ -psb_s_elg_from_gpu.o \ -psb_s_elg_vect_mv.o \ -psb_s_mv_elg_from_coo.o \ -psb_s_mv_elg_from_fmt.o \ -psb_s_cp_hlg_from_fmt.o \ -psb_s_cp_hlg_from_coo.o \ -psb_d_cp_hlg_from_fmt.o \ -psb_d_cp_hlg_from_coo.o \ -psb_d_hlg_allocate_mnnz.o \ -psb_d_hlg_csmm.o \ -psb_d_hlg_csmv.o \ -psb_d_hlg_inner_vect_sv.o \ -psb_d_hlg_mold.o \ -psb_d_hlg_reallocate_nz.o \ -psb_d_hlg_scal.o \ -psb_d_hlg_scals.o \ -psb_d_hlg_from_gpu.o \ -psb_d_hlg_to_gpu.o \ -psb_d_hlg_vect_mv.o \ -psb_s_hlg_allocate_mnnz.o \ -psb_s_hlg_csmm.o \ -psb_s_hlg_csmv.o \ -psb_s_hlg_inner_vect_sv.o \ -psb_s_hlg_mold.o \ -psb_s_hlg_reallocate_nz.o \ -psb_s_hlg_scal.o \ -psb_s_hlg_scals.o \ -psb_s_hlg_from_gpu.o \ -psb_s_hlg_to_gpu.o \ -psb_s_hlg_vect_mv.o \ -psb_s_mv_hlg_from_coo.o \ -psb_s_cp_hlg_from_coo.o \ -psb_s_mv_hlg_from_fmt.o \ -psb_d_mv_hlg_from_coo.o \ -psb_d_cp_hlg_from_coo.o \ -psb_d_mv_hlg_from_fmt.o \ -psb_s_hybg_allocate_mnnz.o \ -psb_s_hybg_csmm.o \ -psb_s_hybg_csmv.o \ -psb_s_hybg_reallocate_nz.o \ -psb_s_hybg_scal.o \ -psb_s_hybg_scals.o \ -psb_s_hybg_to_gpu.o \ -psb_s_hybg_vect_mv.o \ -psb_s_hybg_inner_vect_sv.o \ -psb_s_cp_hybg_from_coo.o \ -psb_s_cp_hybg_from_fmt.o \ -psb_s_mv_hybg_from_fmt.o \ -psb_s_mv_hybg_from_coo.o \ -psb_s_hybg_mold.o \ -psb_d_hybg_allocate_mnnz.o \ -psb_d_hybg_csmm.o \ -psb_d_hybg_csmv.o \ -psb_d_hybg_reallocate_nz.o \ -psb_d_hybg_scal.o \ -psb_d_hybg_scals.o \ -psb_d_hybg_to_gpu.o \ -psb_d_hybg_vect_mv.o \ -psb_d_hybg_inner_vect_sv.o \ -psb_d_cp_hybg_from_coo.o \ -psb_d_cp_hybg_from_fmt.o \ -psb_d_mv_hybg_from_fmt.o \ -psb_d_mv_hybg_from_coo.o \ -psb_d_hybg_mold.o \ -psb_z_cp_csrg_from_coo.o \ -psb_z_cp_csrg_from_fmt.o \ -psb_z_cp_elg_from_coo.o \ -psb_z_cp_elg_from_fmt.o \ -psb_c_cp_csrg_from_coo.o \ -psb_c_cp_csrg_from_fmt.o \ -psb_c_csrg_allocate_mnnz.o \ -psb_c_csrg_csmm.o \ -psb_c_csrg_csmv.o \ -psb_c_csrg_mold.o \ -psb_c_csrg_reallocate_nz.o \ -psb_c_csrg_scal.o \ -psb_c_csrg_scals.o \ -psb_c_csrg_from_gpu.o \ -psb_c_csrg_to_gpu.o \ -psb_c_csrg_vect_mv.o \ -psb_c_csrg_inner_vect_sv.o \ -psb_z_csrg_allocate_mnnz.o \ -psb_z_csrg_csmm.o \ -psb_z_csrg_csmv.o \ -psb_z_csrg_mold.o \ -psb_z_csrg_reallocate_nz.o \ -psb_z_csrg_scal.o \ -psb_z_csrg_scals.o \ -psb_z_csrg_from_gpu.o \ -psb_z_csrg_to_gpu.o \ -psb_z_csrg_vect_mv.o \ -psb_z_csrg_inner_vect_sv.o \ -psb_z_elg_allocate_mnnz.o \ -psb_z_elg_asb.o \ -psb_z_elg_csmm.o \ -psb_z_elg_csmv.o \ -psb_z_elg_csput.o \ -psb_z_elg_inner_vect_sv.o \ -psb_z_elg_mold.o \ -psb_z_elg_reallocate_nz.o \ -psb_z_elg_scal.o \ -psb_z_elg_scals.o \ -psb_z_elg_to_gpu.o \ -psb_z_elg_from_gpu.o \ -psb_z_elg_vect_mv.o \ -psb_z_mv_csrg_from_coo.o \ -psb_z_mv_csrg_from_fmt.o \ -psb_z_mv_elg_from_coo.o \ -psb_z_mv_elg_from_fmt.o \ -psb_c_mv_csrg_from_coo.o \ -psb_c_mv_csrg_from_fmt.o \ -psb_c_cp_elg_from_coo.o \ -psb_c_cp_elg_from_fmt.o \ -psb_c_elg_allocate_mnnz.o \ -psb_c_elg_asb.o \ -psb_c_elg_csmm.o \ -psb_c_elg_csmv.o \ -psb_c_elg_csput.o \ -psb_c_elg_inner_vect_sv.o \ -psb_c_elg_mold.o \ -psb_c_elg_reallocate_nz.o \ -psb_c_elg_scal.o \ -psb_c_elg_scals.o \ -psb_c_elg_to_gpu.o \ -psb_c_elg_from_gpu.o \ -psb_c_elg_vect_mv.o \ -psb_c_mv_elg_from_coo.o \ -psb_c_mv_elg_from_fmt.o \ -psb_c_cp_hlg_from_fmt.o \ -psb_c_cp_hlg_from_coo.o \ -psb_z_cp_hlg_from_fmt.o \ -psb_z_cp_hlg_from_coo.o \ -psb_z_hlg_allocate_mnnz.o \ -psb_z_hlg_csmm.o \ -psb_z_hlg_csmv.o \ -psb_z_hlg_inner_vect_sv.o \ -psb_z_hlg_mold.o \ -psb_z_hlg_reallocate_nz.o \ -psb_z_hlg_scal.o \ -psb_z_hlg_scals.o \ -psb_z_hlg_from_gpu.o \ -psb_z_hlg_to_gpu.o \ -psb_z_hlg_vect_mv.o \ -psb_c_hlg_allocate_mnnz.o \ -psb_c_hlg_csmm.o \ -psb_c_hlg_csmv.o \ -psb_c_hlg_inner_vect_sv.o \ -psb_c_hlg_mold.o \ -psb_c_hlg_reallocate_nz.o \ -psb_c_hlg_scal.o \ -psb_c_hlg_scals.o \ -psb_c_hlg_from_gpu.o \ -psb_c_hlg_to_gpu.o \ -psb_c_hlg_vect_mv.o \ -psb_c_mv_hlg_from_coo.o \ -psb_c_cp_hlg_from_coo.o \ -psb_c_mv_hlg_from_fmt.o \ -psb_z_mv_hlg_from_coo.o \ -psb_z_cp_hlg_from_coo.o \ -psb_z_mv_hlg_from_fmt.o \ -psb_c_hybg_allocate_mnnz.o \ -psb_c_hybg_csmm.o \ -psb_c_hybg_csmv.o \ -psb_c_hybg_reallocate_nz.o \ -psb_c_hybg_scal.o \ -psb_c_hybg_scals.o \ -psb_c_hybg_to_gpu.o \ -psb_c_hybg_vect_mv.o \ -psb_c_hybg_inner_vect_sv.o \ -psb_c_cp_hybg_from_coo.o \ -psb_c_cp_hybg_from_fmt.o \ -psb_c_mv_hybg_from_fmt.o \ -psb_c_mv_hybg_from_coo.o \ -psb_c_hybg_mold.o \ -psb_z_hybg_allocate_mnnz.o \ -psb_z_hybg_csmm.o \ -psb_z_hybg_csmv.o \ -psb_z_hybg_reallocate_nz.o \ -psb_z_hybg_scal.o \ -psb_z_hybg_scals.o \ -psb_z_hybg_to_gpu.o \ -psb_z_hybg_vect_mv.o \ -psb_z_hybg_inner_vect_sv.o \ -psb_z_cp_hybg_from_coo.o \ -psb_z_cp_hybg_from_fmt.o \ -psb_z_mv_hybg_from_fmt.o \ -psb_z_mv_hybg_from_coo.o \ -psb_z_hybg_mold.o \ -psb_d_cp_diag_from_coo.o \ -psb_d_mv_diag_from_coo.o \ -psb_d_diag_to_gpu.o \ -psb_d_diag_csmv.o \ -psb_d_diag_mold.o \ -psb_d_diag_vect_mv.o \ -psb_d_cp_hdiag_from_coo.o \ -psb_d_mv_hdiag_from_coo.o \ -psb_d_hdiag_to_gpu.o \ -psb_d_hdiag_csmv.o \ -psb_d_hdiag_mold.o \ -psb_d_hdiag_vect_mv.o \ -psb_s_cp_hdiag_from_coo.o \ -psb_s_mv_hdiag_from_coo.o \ -psb_s_hdiag_to_gpu.o \ -psb_s_hdiag_csmv.o \ -psb_s_hdiag_mold.o \ -psb_s_hdiag_vect_mv.o \ -psb_s_dnsg_mat_impl.o \ -psb_d_dnsg_mat_impl.o \ -psb_c_dnsg_mat_impl.o \ -psb_z_dnsg_mat_impl.o +psb_d_cuda_cp_csrg_from_coo.o \ +psb_d_cuda_cp_csrg_from_fmt.o \ +psb_d_cuda_cp_elg_from_coo.o \ +psb_d_cuda_cp_elg_from_fmt.o \ +psb_s_cuda_cp_csrg_from_coo.o \ +psb_s_cuda_cp_csrg_from_fmt.o \ +psb_s_cuda_csrg_allocate_mnnz.o \ +psb_s_cuda_csrg_csmm.o \ +psb_s_cuda_csrg_csmv.o \ +psb_s_cuda_csrg_mold.o \ +psb_s_cuda_csrg_reallocate_nz.o \ +psb_s_cuda_csrg_scal.o \ +psb_s_cuda_csrg_scals.o \ +psb_s_cuda_csrg_from_gpu.o \ +psb_s_cuda_csrg_to_gpu.o \ +psb_s_cuda_csrg_vect_mv.o \ +psb_s_cuda_csrg_inner_vect_sv.o \ +psb_d_cuda_csrg_allocate_mnnz.o \ +psb_d_cuda_csrg_csmm.o \ +psb_d_cuda_csrg_csmv.o \ +psb_d_cuda_csrg_mold.o \ +psb_d_cuda_csrg_reallocate_nz.o \ +psb_d_cuda_csrg_scal.o \ +psb_d_cuda_csrg_scals.o \ +psb_d_cuda_csrg_from_gpu.o \ +psb_d_cuda_csrg_to_gpu.o \ +psb_d_cuda_csrg_vect_mv.o \ +psb_d_cuda_csrg_inner_vect_sv.o \ +psb_d_cuda_elg_allocate_mnnz.o \ +psb_d_cuda_elg_asb.o \ +psb_d_cuda_elg_csmm.o \ +psb_d_cuda_elg_csmv.o \ +psb_d_cuda_elg_csput.o \ +psb_d_cuda_elg_from_gpu.o \ +psb_d_cuda_elg_inner_vect_sv.o \ +psb_d_cuda_elg_mold.o \ +psb_d_cuda_elg_reallocate_nz.o \ +psb_d_cuda_elg_scal.o \ +psb_d_cuda_elg_scals.o \ +psb_d_cuda_elg_to_gpu.o \ +psb_d_cuda_elg_vect_mv.o \ +psb_d_cuda_mv_csrg_from_coo.o \ +psb_d_cuda_mv_csrg_from_fmt.o \ +psb_d_cuda_mv_elg_from_coo.o \ +psb_d_cuda_mv_elg_from_fmt.o \ +psb_s_cuda_mv_csrg_from_coo.o \ +psb_s_cuda_mv_csrg_from_fmt.o \ +psb_s_cuda_cp_elg_from_coo.o \ +psb_s_cuda_cp_elg_from_fmt.o \ +psb_s_cuda_elg_allocate_mnnz.o \ +psb_s_cuda_elg_asb.o \ +psb_s_cuda_elg_csmm.o \ +psb_s_cuda_elg_csmv.o \ +psb_s_cuda_elg_csput.o \ +psb_s_cuda_elg_inner_vect_sv.o \ +psb_s_cuda_elg_mold.o \ +psb_s_cuda_elg_reallocate_nz.o \ +psb_s_cuda_elg_scal.o \ +psb_s_cuda_elg_scals.o \ +psb_s_cuda_elg_to_gpu.o \ +psb_s_cuda_elg_from_gpu.o \ +psb_s_cuda_elg_vect_mv.o \ +psb_s_cuda_mv_elg_from_coo.o \ +psb_s_cuda_mv_elg_from_fmt.o \ +psb_s_cuda_cp_hlg_from_fmt.o \ +psb_s_cuda_cp_hlg_from_coo.o \ +psb_d_cuda_cp_hlg_from_fmt.o \ +psb_d_cuda_cp_hlg_from_coo.o \ +psb_d_cuda_hlg_allocate_mnnz.o \ +psb_d_cuda_hlg_csmm.o \ +psb_d_cuda_hlg_csmv.o \ +psb_d_cuda_hlg_inner_vect_sv.o \ +psb_d_cuda_hlg_mold.o \ +psb_d_cuda_hlg_reallocate_nz.o \ +psb_d_cuda_hlg_scal.o \ +psb_d_cuda_hlg_scals.o \ +psb_d_cuda_hlg_from_gpu.o \ +psb_d_cuda_hlg_to_gpu.o \ +psb_d_cuda_hlg_vect_mv.o \ +psb_s_cuda_hlg_allocate_mnnz.o \ +psb_s_cuda_hlg_csmm.o \ +psb_s_cuda_hlg_csmv.o \ +psb_s_cuda_hlg_inner_vect_sv.o \ +psb_s_cuda_hlg_mold.o \ +psb_s_cuda_hlg_reallocate_nz.o \ +psb_s_cuda_hlg_scal.o \ +psb_s_cuda_hlg_scals.o \ +psb_s_cuda_hlg_from_gpu.o \ +psb_s_cuda_hlg_to_gpu.o \ +psb_s_cuda_hlg_vect_mv.o \ +psb_s_cuda_mv_hlg_from_coo.o \ +psb_s_cuda_cp_hlg_from_coo.o \ +psb_s_cuda_mv_hlg_from_fmt.o \ +psb_d_cuda_mv_hlg_from_coo.o \ +psb_d_cuda_cp_hlg_from_coo.o \ +psb_d_cuda_mv_hlg_from_fmt.o \ +psb_s_cuda_hybg_allocate_mnnz.o \ +psb_s_cuda_hybg_csmm.o \ +psb_s_cuda_hybg_csmv.o \ +psb_s_cuda_hybg_reallocate_nz.o \ +psb_s_cuda_hybg_scal.o \ +psb_s_cuda_hybg_scals.o \ +psb_s_cuda_hybg_to_gpu.o \ +psb_s_cuda_hybg_vect_mv.o \ +psb_s_cuda_hybg_inner_vect_sv.o \ +psb_s_cuda_cp_hybg_from_coo.o \ +psb_s_cuda_cp_hybg_from_fmt.o \ +psb_s_cuda_mv_hybg_from_fmt.o \ +psb_s_cuda_mv_hybg_from_coo.o \ +psb_s_cuda_hybg_mold.o \ +psb_d_cuda_hybg_allocate_mnnz.o \ +psb_d_cuda_hybg_csmm.o \ +psb_d_cuda_hybg_csmv.o \ +psb_d_cuda_hybg_reallocate_nz.o \ +psb_d_cuda_hybg_scal.o \ +psb_d_cuda_hybg_scals.o \ +psb_d_cuda_hybg_to_gpu.o \ +psb_d_cuda_hybg_vect_mv.o \ +psb_d_cuda_hybg_inner_vect_sv.o \ +psb_d_cuda_cp_hybg_from_coo.o \ +psb_d_cuda_cp_hybg_from_fmt.o \ +psb_d_cuda_mv_hybg_from_fmt.o \ +psb_d_cuda_mv_hybg_from_coo.o \ +psb_d_cuda_hybg_mold.o \ +psb_z_cuda_cp_csrg_from_coo.o \ +psb_z_cuda_cp_csrg_from_fmt.o \ +psb_z_cuda_cp_elg_from_coo.o \ +psb_z_cuda_cp_elg_from_fmt.o \ +psb_c_cuda_cp_csrg_from_coo.o \ +psb_c_cuda_cp_csrg_from_fmt.o \ +psb_c_cuda_csrg_allocate_mnnz.o \ +psb_c_cuda_csrg_csmm.o \ +psb_c_cuda_csrg_csmv.o \ +psb_c_cuda_csrg_mold.o \ +psb_c_cuda_csrg_reallocate_nz.o \ +psb_c_cuda_csrg_scal.o \ +psb_c_cuda_csrg_scals.o \ +psb_c_cuda_csrg_from_gpu.o \ +psb_c_cuda_csrg_to_gpu.o \ +psb_c_cuda_csrg_vect_mv.o \ +psb_c_cuda_csrg_inner_vect_sv.o \ +psb_z_cuda_csrg_allocate_mnnz.o \ +psb_z_cuda_csrg_csmm.o \ +psb_z_cuda_csrg_csmv.o \ +psb_z_cuda_csrg_mold.o \ +psb_z_cuda_csrg_reallocate_nz.o \ +psb_z_cuda_csrg_scal.o \ +psb_z_cuda_csrg_scals.o \ +psb_z_cuda_csrg_from_gpu.o \ +psb_z_cuda_csrg_to_gpu.o \ +psb_z_cuda_csrg_vect_mv.o \ +psb_z_cuda_csrg_inner_vect_sv.o \ +psb_z_cuda_elg_allocate_mnnz.o \ +psb_z_cuda_elg_asb.o \ +psb_z_cuda_elg_csmm.o \ +psb_z_cuda_elg_csmv.o \ +psb_z_cuda_elg_csput.o \ +psb_z_cuda_elg_inner_vect_sv.o \ +psb_z_cuda_elg_mold.o \ +psb_z_cuda_elg_reallocate_nz.o \ +psb_z_cuda_elg_scal.o \ +psb_z_cuda_elg_scals.o \ +psb_z_cuda_elg_to_gpu.o \ +psb_z_cuda_elg_from_gpu.o \ +psb_z_cuda_elg_vect_mv.o \ +psb_z_cuda_mv_csrg_from_coo.o \ +psb_z_cuda_mv_csrg_from_fmt.o \ +psb_z_cuda_mv_elg_from_coo.o \ +psb_z_cuda_mv_elg_from_fmt.o \ +psb_c_cuda_mv_csrg_from_coo.o \ +psb_c_cuda_mv_csrg_from_fmt.o \ +psb_c_cuda_cp_elg_from_coo.o \ +psb_c_cuda_cp_elg_from_fmt.o \ +psb_c_cuda_elg_allocate_mnnz.o \ +psb_c_cuda_elg_asb.o \ +psb_c_cuda_elg_csmm.o \ +psb_c_cuda_elg_csmv.o \ +psb_c_cuda_elg_csput.o \ +psb_c_cuda_elg_inner_vect_sv.o \ +psb_c_cuda_elg_mold.o \ +psb_c_cuda_elg_reallocate_nz.o \ +psb_c_cuda_elg_scal.o \ +psb_c_cuda_elg_scals.o \ +psb_c_cuda_elg_to_gpu.o \ +psb_c_cuda_elg_from_gpu.o \ +psb_c_cuda_elg_vect_mv.o \ +psb_c_cuda_mv_elg_from_coo.o \ +psb_c_cuda_mv_elg_from_fmt.o \ +psb_c_cuda_cp_hlg_from_fmt.o \ +psb_c_cuda_cp_hlg_from_coo.o \ +psb_z_cuda_cp_hlg_from_fmt.o \ +psb_z_cuda_cp_hlg_from_coo.o \ +psb_z_cuda_hlg_allocate_mnnz.o \ +psb_z_cuda_hlg_csmm.o \ +psb_z_cuda_hlg_csmv.o \ +psb_z_cuda_hlg_inner_vect_sv.o \ +psb_z_cuda_hlg_mold.o \ +psb_z_cuda_hlg_reallocate_nz.o \ +psb_z_cuda_hlg_scal.o \ +psb_z_cuda_hlg_scals.o \ +psb_z_cuda_hlg_from_gpu.o \ +psb_z_cuda_hlg_to_gpu.o \ +psb_z_cuda_hlg_vect_mv.o \ +psb_c_cuda_hlg_allocate_mnnz.o \ +psb_c_cuda_hlg_csmm.o \ +psb_c_cuda_hlg_csmv.o \ +psb_c_cuda_hlg_inner_vect_sv.o \ +psb_c_cuda_hlg_mold.o \ +psb_c_cuda_hlg_reallocate_nz.o \ +psb_c_cuda_hlg_scal.o \ +psb_c_cuda_hlg_scals.o \ +psb_c_cuda_hlg_from_gpu.o \ +psb_c_cuda_hlg_to_gpu.o \ +psb_c_cuda_hlg_vect_mv.o \ +psb_c_cuda_mv_hlg_from_coo.o \ +psb_c_cuda_cp_hlg_from_coo.o \ +psb_c_cuda_mv_hlg_from_fmt.o \ +psb_z_cuda_mv_hlg_from_coo.o \ +psb_z_cuda_cp_hlg_from_coo.o \ +psb_z_cuda_mv_hlg_from_fmt.o \ +psb_c_cuda_hybg_allocate_mnnz.o \ +psb_c_cuda_hybg_csmm.o \ +psb_c_cuda_hybg_csmv.o \ +psb_c_cuda_hybg_reallocate_nz.o \ +psb_c_cuda_hybg_scal.o \ +psb_c_cuda_hybg_scals.o \ +psb_c_cuda_hybg_to_gpu.o \ +psb_c_cuda_hybg_vect_mv.o \ +psb_c_cuda_hybg_inner_vect_sv.o \ +psb_c_cuda_cp_hybg_from_coo.o \ +psb_c_cuda_cp_hybg_from_fmt.o \ +psb_c_cuda_mv_hybg_from_fmt.o \ +psb_c_cuda_mv_hybg_from_coo.o \ +psb_c_cuda_hybg_mold.o \ +psb_z_cuda_hybg_allocate_mnnz.o \ +psb_z_cuda_hybg_csmm.o \ +psb_z_cuda_hybg_csmv.o \ +psb_z_cuda_hybg_reallocate_nz.o \ +psb_z_cuda_hybg_scal.o \ +psb_z_cuda_hybg_scals.o \ +psb_z_cuda_hybg_to_gpu.o \ +psb_z_cuda_hybg_vect_mv.o \ +psb_z_cuda_hybg_inner_vect_sv.o \ +psb_z_cuda_cp_hybg_from_coo.o \ +psb_z_cuda_cp_hybg_from_fmt.o \ +psb_z_cuda_mv_hybg_from_fmt.o \ +psb_z_cuda_mv_hybg_from_coo.o \ +psb_z_cuda_hybg_mold.o \ +psb_d_cuda_cp_diag_from_coo.o \ +psb_d_cuda_mv_diag_from_coo.o \ +psb_d_cuda_diag_to_gpu.o \ +psb_d_cuda_diag_csmv.o \ +psb_d_cuda_diag_mold.o \ +psb_d_cuda_diag_vect_mv.o \ +psb_d_cuda_cp_hdiag_from_coo.o \ +psb_d_cuda_mv_hdiag_from_coo.o \ +psb_d_cuda_hdiag_to_gpu.o \ +psb_d_cuda_hdiag_csmv.o \ +psb_d_cuda_hdiag_mold.o \ +psb_d_cuda_hdiag_vect_mv.o \ +psb_s_cuda_cp_hdiag_from_coo.o \ +psb_s_cuda_mv_hdiag_from_coo.o \ +psb_s_cuda_hdiag_to_gpu.o \ +psb_s_cuda_hdiag_csmv.o \ +psb_s_cuda_hdiag_mold.o \ +psb_s_cuda_hdiag_vect_mv.o \ +psb_s_cuda_dnsg_mat_impl.o \ +psb_d_cuda_dnsg_mat_impl.o \ +psb_c_cuda_dnsg_mat_impl.o \ +psb_z_cuda_dnsg_mat_impl.o objs: $(OBJS) diff --git a/cuda/impl/psb_c_cp_csrg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_cp_csrg_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 index 9ab3b7f0..af3301ff 100644 --- a/cuda/impl/psb_c_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_c_cp_csrg_from_coo(a,b,info) +subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_cp_csrg_from_coo + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_coo #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_c_cp_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_cp_csrg_from_coo +end subroutine psb_c_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_c_cp_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_c_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 index 5229244f..47845e52 100644 --- a/cuda/impl/psb_c_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_c_cp_csrg_from_fmt(a,b,info) +subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_cp_csrg_from_fmt + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_fmt #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif !use iso_c_binding implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_c_cp_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_c_cp_csrg_from_fmt +end subroutine psb_c_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_c_cp_diag_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_cp_diag_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 index 8d196891..5b1eb817 100644 --- a/cuda/impl/psb_c_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_cp_diag_from_coo(a,b,info) +subroutine psb_c_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_cp_diag_from_coo + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_cp_diag_from_coo #else - use psb_c_diag_mat_mod + use psb_c_cuda_diag_mat_mod #endif implicit none - class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -61,4 +61,4 @@ subroutine psb_c_cp_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_cp_diag_from_coo +end subroutine psb_c_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_c_cp_elg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 similarity index 94% rename from cuda/impl/psb_c_cp_elg_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 index 95193c13..fedffa22 100644 --- a/cuda/impl/psb_c_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_c_cp_elg_from_coo(a,b,info) +subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_cp_elg_from_coo + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_coo use psi_ext_util_mod - use psb_gpu_env_mod + use psb_cuda_env_mod #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -58,7 +58,7 @@ subroutine psb_c_cp_elg_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - hacksize = max(1,psb_gpu_WarpSize()) + hacksize = max(1,psb_cuda_WarpSize()) #else hacksize = 1 #endif @@ -181,4 +181,4 @@ contains end subroutine psi_c_count_ell_from_coo -end subroutine psb_c_cp_elg_from_coo +end subroutine psb_c_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_c_cp_elg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 similarity index 93% rename from cuda/impl/psb_c_cp_elg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 index e8be8a8d..4c44d29a 100644 --- a/cuda/impl/psb_c_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_cp_elg_from_fmt(a,b,info) +subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_cp_elg_from_fmt + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_fmt #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -98,4 +98,4 @@ subroutine psb_c_cp_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_c_cp_elg_from_fmt +end subroutine psb_c_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_c_cp_hdiag_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_c_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 index f0ec00ad..436eabaa 100644 --- a/cuda/impl/psb_c_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_c_cp_hdiag_from_coo(a,b,info) +subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_cp_hdiag_from_coo - use psb_gpu_env_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_c_hdiag_mat_mod + use psb_c_cuda_hdiag_mat_mod #endif implicit none - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_c_cp_hdiag_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_c_hdia_sparse_mat%cp_from_coo(b,info) @@ -70,4 +70,4 @@ subroutine psb_c_cp_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_cp_hdiag_from_coo +end subroutine psb_c_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_c_cp_hlg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 similarity index 95% rename from cuda/impl/psb_c_cp_hlg_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 index cf305592..d30fccbf 100644 --- a/cuda/impl/psb_c_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_c_cp_hlg_from_coo(a,b,info) +subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_cp_hlg_from_coo + use psb_cuda_env_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_coo #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -62,7 +62,7 @@ subroutine psb_c_cp_hlg_from_coo(a,b,info) debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() #ifdef HAVE_SPGPU - hksz = max(1,psb_gpu_WarpSize()) + hksz = max(1,psb_cuda_WarpSize()) #else hksz = psi_get_hksz() #endif @@ -195,4 +195,4 @@ contains !!$ write(*,*) 'End of psi_comput_hckoff ',info end subroutine psi_compute_hckoff_from_coo -end subroutine psb_c_cp_hlg_from_coo +end subroutine psb_c_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_c_cp_hlg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 similarity index 90% rename from cuda/impl/psb_c_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 index 559c501c..259364cd 100644 --- a/cuda/impl/psb_c_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_cp_hlg_from_fmt(a,b,info) +subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_cp_hlg_from_fmt + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_fmt #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -65,4 +65,4 @@ subroutine psb_c_cp_hlg_from_fmt(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_cp_hlg_from_fmt +end subroutine psb_c_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_c_cp_hybg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_cp_hybg_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 index 00a7d4ee..7ebb5240 100644 --- a/cuda/impl/psb_c_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_cp_hybg_from_coo(a,b,info) +subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_cp_hybg_from_coo + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_coo #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -60,5 +60,5 @@ subroutine psb_c_cp_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_cp_hybg_from_coo +end subroutine psb_c_cuda_cp_hybg_from_coo #endif diff --git a/cuda/impl/psb_c_cp_hybg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_c_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 index 643abf99..033ba966 100644 --- a/cuda/impl/psb_c_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_cp_hybg_from_fmt(a,b,info) +subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_cp_hybg_from_fmt + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_fmt #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_c_cp_hybg_from_fmt(a,b,info) #endif end select -end subroutine psb_c_cp_hybg_from_fmt +end subroutine psb_c_cuda_cp_hybg_from_fmt #endif diff --git a/cuda/impl/psb_c_csrg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 similarity index 89% rename from cuda/impl/psb_c_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 index 2183ee63..d9736d23 100644 --- a/cuda/impl/psb_c_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_csrg_allocate_mnnz(m,n,a,nz) +subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_allocate_mnnz + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_allocate_mnnz #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,4 +65,4 @@ subroutine psb_c_csrg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_c_csrg_allocate_mnnz +end subroutine psb_c_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_c_csrg_csmm.F90 b/cuda/impl/psb_c_cuda_csrg_csmm.F90 similarity index 94% rename from cuda/impl/psb_c_csrg_csmm.F90 rename to cuda/impl/psb_c_cuda_csrg_csmm.F90 index cef5d288..8f2f55b9 100644 --- a/cuda/impl/psb_c_csrg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_csrg_csmm.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_csrg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_csmm + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmm #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -131,4 +131,4 @@ subroutine psb_c_csrg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_csrg_csmm +end subroutine psb_c_cuda_csrg_csmm diff --git a/cuda/impl/psb_c_csrg_csmv.F90 b/cuda/impl/psb_c_cuda_csrg_csmv.F90 similarity index 93% rename from cuda/impl/psb_c_csrg_csmv.F90 rename to cuda/impl/psb_c_cuda_csrg_csmv.F90 index 3a543da3..ba681401 100644 --- a/cuda/impl/psb_c_csrg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_csrg_csmv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_csrg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_csmv + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmv #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_c_csrg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_csrg_csmv' + character(len=20) :: name='c_cuda_csrg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -136,4 +136,4 @@ subroutine psb_c_csrg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_csrg_csmv +end subroutine psb_c_cuda_csrg_csmv diff --git a/cuda/impl/psb_c_csrg_from_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_c_csrg_from_gpu.F90 rename to cuda/impl/psb_c_cuda_csrg_from_gpu.F90 index 606606bb..b1bed7e5 100644 --- a/cuda/impl/psb_c_csrg_from_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_csrg_from_gpu(a,info) +subroutine psb_c_cuda_csrg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_from_gpu + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_from_gpu #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, n, nz @@ -70,4 +70,4 @@ subroutine psb_c_csrg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_c_csrg_from_gpu +end subroutine psb_c_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_c_csrg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_c_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 index 39938752..32dec5ef 100644 --- a/cuda/impl/psb_c_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_inner_vect_sv + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_inner_vect_sv #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -51,7 +51,7 @@ subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='c_csrg_inner_vect_sv' + character(len=20) :: name='c_cuda_csrg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -83,9 +83,9 @@ subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -133,4 +133,4 @@ subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_csrg_inner_vect_sv +end subroutine psb_c_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_c_csrg_mold.F90 b/cuda/impl/psb_c_cuda_csrg_mold.F90 similarity index 88% rename from cuda/impl/psb_c_csrg_mold.F90 rename to cuda/impl/psb_c_cuda_csrg_mold.F90 index 8b1b616a..405f2736 100644 --- a/cuda/impl/psb_c_csrg_mold.F90 +++ b/cuda/impl/psb_c_cuda_csrg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_csrg_mold(a,b,info) +subroutine psb_c_cuda_csrg_mold(a,b,info) use psb_base_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_mold + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_mold implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_csrg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_csrg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_csrg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_c_csrg_mold(a,b,info) return -end subroutine psb_c_csrg_mold +end subroutine psb_c_cuda_csrg_mold diff --git a/cuda/impl/psb_c_csrg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_c_csrg_reallocate_nz.F90 rename to cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 index e9db4128..22f9f118 100644 --- a/cuda/impl/psb_c_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_c_csrg_reallocate_nz(nz,a) +subroutine psb_c_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_reallocate_nz + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_reallocate_nz #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='c_csrg_reallocate_nz' + character(len=20) :: name='c_cuda_csrg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,4 +67,4 @@ subroutine psb_c_csrg_reallocate_nz(nz,a) return -end subroutine psb_c_csrg_reallocate_nz +end subroutine psb_c_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_c_csrg_scal.F90 b/cuda/impl/psb_c_cuda_csrg_scal.F90 similarity index 90% rename from cuda/impl/psb_c_csrg_scal.F90 rename to cuda/impl/psb_c_cuda_csrg_scal.F90 index f183a822..556a0ec5 100644 --- a/cuda/impl/psb_c_csrg_scal.F90 +++ b/cuda/impl/psb_c_cuda_csrg_scal.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_c_csrg_scal(d,a,info,side) +subroutine psb_c_cuda_csrg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_scal + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scal #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -70,4 +70,4 @@ subroutine psb_c_csrg_scal(d,a,info,side) return -end subroutine psb_c_csrg_scal +end subroutine psb_c_cuda_csrg_scal diff --git a/cuda/impl/psb_c_csrg_scals.F90 b/cuda/impl/psb_c_cuda_csrg_scals.F90 similarity index 90% rename from cuda/impl/psb_c_csrg_scals.F90 rename to cuda/impl/psb_c_cuda_csrg_scals.F90 index 13f0d707..a67e91cd 100644 --- a/cuda/impl/psb_c_csrg_scals.F90 +++ b/cuda/impl/psb_c_cuda_csrg_scals.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_c_csrg_scals(d,a,info) +subroutine psb_c_cuda_csrg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_scals + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scals #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -68,4 +68,4 @@ subroutine psb_c_csrg_scals(d,a,info) return -end subroutine psb_c_csrg_scals +end subroutine psb_c_cuda_csrg_scals diff --git a/cuda/impl/psb_c_csrg_to_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 similarity index 98% rename from cuda/impl/psb_c_csrg_to_gpu.F90 rename to cuda/impl/psb_c_cuda_csrg_to_gpu.F90 index a04f1bab..ea710cbc 100644 --- a/cuda/impl/psb_c_csrg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_c_csrg_to_gpu(a,info,nzrm) +subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_to_gpu + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_to_gpu #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -322,4 +322,4 @@ subroutine psb_c_csrg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_c_csrg_to_gpu +end subroutine psb_c_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_c_csrg_vect_mv.F90 b/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_c_csrg_vect_mv.F90 rename to cuda/impl/psb_c_cuda_csrg_vect_mv.F90 index 0feb03fd..cb556d20 100644 --- a/cuda/impl/psb_c_csrg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_vect_mv + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_vect_mv #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -54,7 +54,7 @@ subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_csrg_vect_mv' + character(len=20) :: name='c_cuda_csrg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= czero) then if (yy%is_host()) call yy%sync() @@ -122,4 +122,4 @@ subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) 9999 call psb_error_handler(err_act) return -end subroutine psb_c_csrg_vect_mv +end subroutine psb_c_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_c_diag_csmv.F90 b/cuda/impl/psb_c_cuda_diag_csmv.F90 similarity index 92% rename from cuda/impl/psb_c_diag_csmv.F90 rename to cuda/impl/psb_c_cuda_diag_csmv.F90 index 05ca102f..00ab742d 100644 --- a/cuda/impl/psb_c_diag_csmv.F90 +++ b/cuda/impl/psb_c_cuda_diag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_csmv + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_csmv #else - use psb_c_diag_mat_mod + use psb_c_cuda_diag_mat_mod #endif implicit none - class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='c_diag_csmv' + character(len=20) :: name='c_cuda_diag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_diag_csmv +end subroutine psb_c_cuda_diag_csmv diff --git a/cuda/impl/psb_c_diag_mold.F90 b/cuda/impl/psb_c_cuda_diag_mold.F90 similarity index 88% rename from cuda/impl/psb_c_diag_mold.F90 rename to cuda/impl/psb_c_cuda_diag_mold.F90 index 8d79e78d..4c8a3c56 100644 --- a/cuda/impl/psb_c_diag_mold.F90 +++ b/cuda/impl/psb_c_cuda_diag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_diag_mold(a,b,info) +subroutine psb_c_cuda_diag_mold(a,b,info) use psb_base_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_mold + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_mold implicit none - class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_diag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_diag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_diag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_c_diag_mold(a,b,info) return -end subroutine psb_c_diag_mold +end subroutine psb_c_cuda_diag_mold diff --git a/cuda/impl/psb_c_diag_to_gpu.F90 b/cuda/impl/psb_c_cuda_diag_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_c_diag_to_gpu.F90 rename to cuda/impl/psb_c_cuda_diag_to_gpu.F90 index a60fc741..4f2c21d9 100644 --- a/cuda/impl/psb_c_diag_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_diag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_diag_to_gpu(a,info,nzrm) +subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_to_gpu + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_to_gpu #else - use psb_c_diag_mat_mod + use psb_c_cuda_diag_mat_mod #endif use iso_c_binding implicit none - class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -71,4 +71,4 @@ subroutine psb_c_diag_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_c_diag_to_gpu +end subroutine psb_c_cuda_diag_to_gpu diff --git a/cuda/impl/psb_c_diag_vect_mv.F90 b/cuda/impl/psb_c_cuda_diag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_c_diag_vect_mv.F90 rename to cuda/impl/psb_c_cuda_diag_vect_mv.F90 index e680a737..02bb9587 100644 --- a/cuda/impl/psb_c_diag_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_diag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_vect_mv + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_vect_mv #else - use psb_c_diag_mat_mod + use psb_c_cuda_diag_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_diag_vect_mv' + character(len=20) :: name='c_cuda_diag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_diag_vect_mv +end subroutine psb_c_cuda_diag_vect_mv diff --git a/cuda/impl/psb_c_dnsg_mat_impl.F90 b/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 similarity index 77% rename from cuda/impl/psb_c_dnsg_mat_impl.F90 rename to cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 index b70f383a..bb2ec97b 100644 --- a/cuda/impl/psb_c_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_dnsg_vect_mv + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_vect_mv #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(in) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -50,7 +50,7 @@ subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) character :: trans_ complex(psb_spk_), allocatable :: rx(:), ry(:) Integer(Psb_ipk_) :: err_act, m, n, k - character(len=20) :: name='c_dnsg_vect_mv' + character(len=20) :: name='c_cuda_dnsg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -76,9 +76,9 @@ subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) k = a%get_nrows() end if select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= czero) then @@ -117,21 +117,21 @@ subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_dnsg_vect_mv +end subroutine psb_c_cuda_dnsg_vect_mv -subroutine psb_c_dnsg_mold(a,b,info) +subroutine psb_c_cuda_dnsg_mold(a,b,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_dnsg_mold + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_mold #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(in) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -145,7 +145,7 @@ subroutine psb_c_dnsg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_dnsg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_dnsg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -158,54 +158,54 @@ subroutine psb_c_dnsg_mold(a,b,info) return -end subroutine psb_c_dnsg_mold +end subroutine psb_c_cuda_dnsg_mold !!$ !!$ interface -!!$ subroutine psb_c_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_c_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type -!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_c_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_dnsg_inner_vect_sv +!!$ end subroutine psb_c_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_c_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_c_dnsg_reallocate_nz +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_c_dnsg_allocate_mnnz +!!$ end subroutine psb_c_cuda_dnsg_allocate_mnnz !!$ end interface -subroutine psb_c_dnsg_to_gpu(a,info) +subroutine psb_c_cuda_dnsg_to_gpu(a,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_dnsg_to_gpu + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_to_gpu #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda logical, parameter :: debug=.false. - character(len=20) :: name='c_dnsg_to_gpu' + character(len=20) :: name='c_cuda_dnsg_to_gpu' call psb_erractionsave(err_act) info = psb_success_ @@ -226,27 +226,27 @@ subroutine psb_c_dnsg_to_gpu(a,info) return -end subroutine psb_c_dnsg_to_gpu +end subroutine psb_c_cuda_dnsg_to_gpu -subroutine psb_c_cp_dnsg_from_coo(a,b,info) +subroutine psb_c_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_cp_dnsg_from_coo + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_coo #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_dnsg_cp_from_coo' + character(len=20) :: name='c_cuda_dnsg_cp_from_coo' integer(psb_ipk_) :: debug_level, debug_unit logical, parameter :: debug=.false. type(psb_c_coo_sparse_mat) :: tmp @@ -267,27 +267,27 @@ subroutine psb_c_cp_dnsg_from_coo(a,b,info) return -end subroutine psb_c_cp_dnsg_from_coo +end subroutine psb_c_cuda_cp_dnsg_from_coo -subroutine psb_c_cp_dnsg_from_fmt(a,b,info) +subroutine psb_c_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_cp_dnsg_from_fmt + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_fmt #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info type(psb_c_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_dnsg_cp_from_fmt' + character(len=20) :: name='c_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -341,29 +341,29 @@ subroutine psb_c_cp_dnsg_from_fmt(a,b,info) return -end subroutine psb_c_cp_dnsg_from_fmt +end subroutine psb_c_cuda_cp_dnsg_from_fmt -subroutine psb_c_mv_dnsg_from_coo(a,b,info) +subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_mv_dnsg_from_coo + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_coo #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act logical, parameter :: debug=.false. - character(len=20) :: name='c_dnsg_mv_from_coo' + character(len=20) :: name='c_cuda_dnsg_mv_from_coo' call psb_erractionsave(err_act) info = psb_success_ @@ -382,28 +382,28 @@ subroutine psb_c_mv_dnsg_from_coo(a,b,info) return -end subroutine psb_c_mv_dnsg_from_coo +end subroutine psb_c_cuda_mv_dnsg_from_coo -subroutine psb_c_mv_dnsg_from_fmt(a,b,info) +subroutine psb_c_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_mv_dnsg_from_fmt + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_fmt #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info type(psb_c_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_dnsg_cp_from_fmt' + character(len=20) :: name='c_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -458,4 +458,4 @@ subroutine psb_c_mv_dnsg_from_fmt(a,b,info) return -end subroutine psb_c_mv_dnsg_from_fmt +end subroutine psb_c_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_c_elg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 similarity index 93% rename from cuda/impl/psb_c_elg_allocate_mnnz.F90 rename to cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 index ac9e654f..01ca7189 100644 --- a/cuda/impl/psb_c_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_elg_allocate_mnnz(m,n,a,nz) +subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_allocate_mnnz + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_allocate_mnnz #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -110,4 +110,4 @@ subroutine psb_c_elg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_c_elg_allocate_mnnz +end subroutine psb_c_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_d_elg_asb.f90 b/cuda/impl/psb_c_cuda_elg_asb.f90 similarity index 92% rename from cuda/impl/psb_d_elg_asb.f90 rename to cuda/impl/psb_c_cuda_elg_asb.f90 index f80537ef..24af1cc9 100644 --- a/cuda/impl/psb_d_elg_asb.f90 +++ b/cuda/impl/psb_c_cuda_elg_asb.f90 @@ -30,13 +30,13 @@ ! -subroutine psb_d_elg_asb(a) +subroutine psb_c_cuda_elg_asb(a) use psb_base_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_asb + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_asb implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: err_act, info character(len=20) :: name='elg_asb' @@ -62,4 +62,4 @@ subroutine psb_d_elg_asb(a) return -end subroutine psb_d_elg_asb +end subroutine psb_c_cuda_elg_asb diff --git a/cuda/impl/psb_c_elg_csmm.F90 b/cuda/impl/psb_c_cuda_elg_csmm.F90 similarity index 93% rename from cuda/impl/psb_c_elg_csmm.F90 rename to cuda/impl/psb_c_cuda_elg_csmm.F90 index 5d355d88..a5f0e3d5 100644 --- a/cuda/impl/psb_c_elg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_elg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csmm + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmm #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_elg_csmm' + character(len=20) :: name='c_cuda_elg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,4 +131,4 @@ subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_elg_csmm +end subroutine psb_c_cuda_elg_csmm diff --git a/cuda/impl/psb_c_elg_csmv.F90 b/cuda/impl/psb_c_cuda_elg_csmv.F90 similarity index 94% rename from cuda/impl/psb_c_elg_csmv.F90 rename to cuda/impl/psb_c_cuda_elg_csmv.F90 index 9e377726..00f39e8c 100644 --- a/cuda/impl/psb_c_elg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_elg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csmv + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmv #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -133,4 +133,4 @@ subroutine psb_c_elg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_elg_csmv +end subroutine psb_c_cuda_elg_csmv diff --git a/cuda/impl/psb_c_elg_csput.F90 b/cuda/impl/psb_c_cuda_elg_csput.F90 similarity index 89% rename from cuda/impl/psb_c_elg_csput.F90 rename to cuda/impl/psb_c_cuda_elg_csput.F90 index 2a632f21..cc6fc024 100644 --- a/cuda/impl/psb_c_elg_csput.F90 +++ b/cuda/impl/psb_c_cuda_elg_csput.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_c_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_c_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csput_a + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_a #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - character(len=20) :: name='c_elg_csput_a' + character(len=20) :: name='c_cuda_elg_csput_a' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit real(psb_dpk_) :: t1,t2,t3 @@ -120,24 +120,24 @@ subroutine psb_c_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_c_elg_csput_a +end subroutine psb_c_cuda_elg_csput_a -subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_c_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csput_v - use psb_c_gpu_vect_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_v + use psb_c_cuda_vect_mod #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax @@ -145,7 +145,7 @@ subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act - character(len=20) :: name='c_elg_csput_v' + character(len=20) :: name='c_cuda_elg_csput_v' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw logical :: gpu_invoked @@ -199,11 +199,11 @@ subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) t1=psb_wtime() gpu_invoked = .false. select type (ia) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (ja) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (val) - class is (psb_c_vect_gpu) + class is (psb_c_vect_cuda) if (a%is_host()) call a%sync() if (val%is_host()) call val%sync() if (ia%is_host()) call ia%sync() @@ -245,4 +245,4 @@ subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_c_elg_csput_v +end subroutine psb_c_cuda_elg_csput_v diff --git a/cuda/impl/psb_s_elg_from_gpu.F90 b/cuda/impl/psb_c_cuda_elg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_s_elg_from_gpu.F90 rename to cuda/impl/psb_c_cuda_elg_from_gpu.F90 index d043790d..593b52be 100644 --- a/cuda/impl/psb_s_elg_from_gpu.F90 +++ b/cuda/impl/psb_c_cuda_elg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_from_gpu(a,info) +subroutine psb_c_cuda_elg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_from_gpu + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_from_gpu #else - use psb_s_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize @@ -71,4 +71,4 @@ subroutine psb_s_elg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_s_elg_from_gpu +end subroutine psb_c_cuda_elg_from_gpu diff --git a/cuda/impl/psb_c_elg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 similarity index 89% rename from cuda/impl/psb_c_elg_inner_vect_sv.F90 rename to cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 index 97f0f7ff..43843dc6 100644 --- a/cuda/impl/psb_c_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_c_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_inner_vect_sv + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_inner_vect_sv #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans integer(psb_ipk_) :: err_act - character(len=20) :: name='c_elg_inner_vect_sv' + character(len=20) :: name='c_cuda_elg_inner_vect_sv' logical, parameter :: debug=.false. complex(psb_spk_), allocatable :: rx(:), ry(:) @@ -86,4 +86,4 @@ subroutine psb_c_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_elg_inner_vect_sv +end subroutine psb_c_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_c_elg_mold.F90 b/cuda/impl/psb_c_cuda_elg_mold.F90 similarity index 89% rename from cuda/impl/psb_c_elg_mold.F90 rename to cuda/impl/psb_c_cuda_elg_mold.F90 index 17cd2ce2..b428055c 100644 --- a/cuda/impl/psb_c_elg_mold.F90 +++ b/cuda/impl/psb_c_cuda_elg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_elg_mold(a,b,info) +subroutine psb_c_cuda_elg_mold(a,b,info) use psb_base_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_mold + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_mold implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_elg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_elg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_elg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_c_elg_mold(a,b,info) return -end subroutine psb_c_elg_mold +end subroutine psb_c_cuda_elg_mold diff --git a/cuda/impl/psb_c_elg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 similarity index 89% rename from cuda/impl/psb_c_elg_reallocate_nz.F90 rename to cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 index 40d94d36..b97530e1 100644 --- a/cuda/impl/psb_c_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_c_elg_reallocate_nz(nz,a) +subroutine psb_c_cuda_elg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_reallocate_nz + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_reallocate_nz #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='c_elg_reallocate_nz' + character(len=20) :: name='c_cuda_elg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -76,4 +76,4 @@ subroutine psb_c_elg_reallocate_nz(nz,a) return -end subroutine psb_c_elg_reallocate_nz +end subroutine psb_c_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_c_elg_scal.F90 b/cuda/impl/psb_c_cuda_elg_scal.F90 similarity index 91% rename from cuda/impl/psb_c_elg_scal.F90 rename to cuda/impl/psb_c_cuda_elg_scal.F90 index 63d9907e..b169451b 100644 --- a/cuda/impl/psb_c_elg_scal.F90 +++ b/cuda/impl/psb_c_cuda_elg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_scal(d,a,info,side) +subroutine psb_c_cuda_elg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_scal + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scal #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -75,4 +75,4 @@ subroutine psb_c_elg_scal(d,a,info,side) return -end subroutine psb_c_elg_scal +end subroutine psb_c_cuda_elg_scal diff --git a/cuda/impl/psb_c_elg_scals.F90 b/cuda/impl/psb_c_cuda_elg_scals.F90 similarity index 90% rename from cuda/impl/psb_c_elg_scals.F90 rename to cuda/impl/psb_c_cuda_elg_scals.F90 index b954e0a1..d20ee568 100644 --- a/cuda/impl/psb_c_elg_scals.F90 +++ b/cuda/impl/psb_c_cuda_elg_scals.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_scals(d,a,info) +subroutine psb_c_cuda_elg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_scals + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scals #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_c_elg_scals(d,a,info) return -end subroutine psb_c_elg_scals +end subroutine psb_c_cuda_elg_scals diff --git a/cuda/impl/psb_c_elg_to_gpu.F90 b/cuda/impl/psb_c_cuda_elg_to_gpu.F90 similarity index 93% rename from cuda/impl/psb_c_elg_to_gpu.F90 rename to cuda/impl/psb_c_cuda_elg_to_gpu.F90 index b967a59b..5ea61a41 100644 --- a/cuda/impl/psb_c_elg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_elg_to_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_to_gpu(a,info,nzrm) +subroutine psb_c_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_to_gpu + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_to_gpu #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -90,4 +90,4 @@ subroutine psb_c_elg_to_gpu(a,info,nzrm) call a%set_sync() #endif -end subroutine psb_c_elg_to_gpu +end subroutine psb_c_cuda_elg_to_gpu diff --git a/cuda/impl/psb_d_elg_trim.f90 b/cuda/impl/psb_c_cuda_elg_trim.f90 similarity index 92% rename from cuda/impl/psb_d_elg_trim.f90 rename to cuda/impl/psb_c_cuda_elg_trim.f90 index d2a2047c..483e189d 100644 --- a/cuda/impl/psb_d_elg_trim.f90 +++ b/cuda/impl/psb_c_cuda_elg_trim.f90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_elg_trim(a) +subroutine psb_c_cuda_elg_trim(a) use psb_base_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_trim + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_trim implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld character(len=20) :: name='trim' logical, parameter :: debug=.false. @@ -59,4 +59,4 @@ subroutine psb_d_elg_trim(a) return -end subroutine psb_d_elg_trim +end subroutine psb_c_cuda_elg_trim diff --git a/cuda/impl/psb_c_elg_vect_mv.F90 b/cuda/impl/psb_c_cuda_elg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_c_elg_vect_mv.F90 rename to cuda/impl/psb_c_cuda_elg_vect_mv.F90 index ec6e5b50..b89ba5a2 100644 --- a/cuda/impl/psb_c_elg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_elg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_vect_mv + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_vect_mv #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_elg_vect_mv' + character(len=20) :: name='c_cuda_elg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= czero) then @@ -128,4 +128,4 @@ subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_elg_vect_mv +end subroutine psb_c_cuda_elg_vect_mv diff --git a/cuda/impl/psb_c_hdiag_csmv.F90 b/cuda/impl/psb_c_cuda_hdiag_csmv.F90 similarity index 92% rename from cuda/impl/psb_c_hdiag_csmv.F90 rename to cuda/impl/psb_c_cuda_hdiag_csmv.F90 index 1ba58c6f..4ea2c269 100644 --- a/cuda/impl/psb_c_hdiag_csmv.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_csmv + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_csmv #else - use psb_c_hdiag_mat_mod + use psb_c_cuda_hdiag_mat_mod #endif implicit none - class(psb_c_hdiag_sparse_mat), intent(in) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='c_hdiag_csmv' + character(len=20) :: name='c_cuda_hdiag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hdiag_csmv +end subroutine psb_c_cuda_hdiag_csmv diff --git a/cuda/impl/psb_c_hdiag_mold.F90 b/cuda/impl/psb_c_cuda_hdiag_mold.F90 similarity index 88% rename from cuda/impl/psb_c_hdiag_mold.F90 rename to cuda/impl/psb_c_cuda_hdiag_mold.F90 index bc913690..67e0b92e 100644 --- a/cuda/impl/psb_c_hdiag_mold.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_hdiag_mold(a,b,info) +subroutine psb_c_cuda_hdiag_mold(a,b,info) use psb_base_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_mold + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_mold implicit none - class(psb_c_hdiag_sparse_mat), intent(in) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_hdiag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_hdiag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_hdiag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_c_hdiag_mold(a,b,info) return -end subroutine psb_c_hdiag_mold +end subroutine psb_c_cuda_hdiag_mold diff --git a/cuda/impl/psb_c_hdiag_to_gpu.F90 b/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 similarity index 92% rename from cuda/impl/psb_c_hdiag_to_gpu.F90 rename to cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 index 565babe0..63ab178a 100644 --- a/cuda/impl/psb_c_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hdiag_to_gpu(a,info) +subroutine psb_c_cuda_hdiag_to_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_to_gpu + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_to_gpu #else - use psb_c_hdiag_mat_mod + use psb_c_cuda_hdiag_mat_mod #endif use iso_c_binding implicit none - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight #ifdef HAVE_SPGPU @@ -83,4 +83,4 @@ subroutine psb_c_hdiag_to_gpu(a,info) #endif -end subroutine psb_c_hdiag_to_gpu +end subroutine psb_c_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_c_hdiag_vect_mv.F90 b/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_c_hdiag_vect_mv.F90 rename to cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 index a891a274..fb80611f 100644 --- a/cuda/impl/psb_c_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_vect_mv + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_vect_mv #else - use psb_c_hdiag_mat_mod + use psb_c_cuda_hdiag_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_hdiag_sparse_mat), intent(in) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hdiag_vect_mv' + character(len=20) :: name='c_cuda_hdiag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hdiag_vect_mv +end subroutine psb_c_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_c_hlg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_c_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 index 27e5c0b6..277b974f 100644 --- a/cuda/impl/psb_c_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hlg_allocate_mnnz(m,n,a,nz) +subroutine psb_c_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_allocate_mnnz + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_allocate_mnnz #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -68,4 +68,4 @@ subroutine psb_c_hlg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_c_hlg_allocate_mnnz +end subroutine psb_c_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_c_hlg_csmm.F90 b/cuda/impl/psb_c_cuda_hlg_csmm.F90 similarity index 93% rename from cuda/impl/psb_c_hlg_csmm.F90 rename to cuda/impl/psb_c_cuda_hlg_csmm.F90 index c33b2dde..f351ffd0 100644 --- a/cuda/impl/psb_c_hlg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_hlg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_csmm + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_csmm #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hlg_csmm' + character(len=20) :: name='c_cuda_hlg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -129,4 +129,4 @@ subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hlg_csmm +end subroutine psb_c_cuda_hlg_csmm diff --git a/cuda/impl/psb_c_hlg_csmv.F90 b/cuda/impl/psb_c_cuda_hlg_csmv.F90 similarity index 93% rename from cuda/impl/psb_c_hlg_csmv.F90 rename to cuda/impl/psb_c_cuda_hlg_csmv.F90 index 2599e44e..d39e5f51 100644 --- a/cuda/impl/psb_c_hlg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_hlg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_hlg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_csmv + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_csmv #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_hlg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='c_hlg_csmv' + character(len=20) :: name='c_cuda_hlg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -132,4 +132,4 @@ subroutine psb_c_hlg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hlg_csmv +end subroutine psb_c_cuda_hlg_csmv diff --git a/cuda/impl/psb_s_hlg_from_gpu.F90 b/cuda/impl/psb_c_cuda_hlg_from_gpu.F90 similarity index 92% rename from cuda/impl/psb_s_hlg_from_gpu.F90 rename to cuda/impl/psb_c_cuda_hlg_from_gpu.F90 index 1c34b15e..f823153d 100644 --- a/cuda/impl/psb_s_hlg_from_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hlg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_hlg_from_gpu(a,info) +subroutine psb_c_cuda_hlg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_from_gpu + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_from_gpu #else - use psb_s_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr @@ -73,4 +73,4 @@ subroutine psb_s_hlg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_s_hlg_from_gpu +end subroutine psb_c_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_c_hlg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_c_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 index 0955d8a1..6202885c 100644 --- a/cuda/impl/psb_c_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_inner_vect_sv + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_inner_vect_sv #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -78,4 +78,4 @@ subroutine psb_c_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hlg_inner_vect_sv +end subroutine psb_c_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_c_hlg_mold.F90 b/cuda/impl/psb_c_cuda_hlg_mold.F90 similarity index 89% rename from cuda/impl/psb_c_hlg_mold.F90 rename to cuda/impl/psb_c_cuda_hlg_mold.F90 index 321111f0..85453422 100644 --- a/cuda/impl/psb_c_hlg_mold.F90 +++ b/cuda/impl/psb_c_cuda_hlg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_hlg_mold(a,b,info) +subroutine psb_c_cuda_hlg_mold(a,b,info) use psb_base_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_mold + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_mold implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer, intent(out) :: info Integer :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_hlg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_hlg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_hlg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -61,4 +61,4 @@ subroutine psb_c_hlg_mold(a,b,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_c_hlg_mold +end subroutine psb_c_cuda_hlg_mold diff --git a/cuda/impl/psb_c_hlg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_c_hlg_reallocate_nz.F90 rename to cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 index a27c3f55..848b659d 100644 --- a/cuda/impl/psb_c_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_c_hlg_reallocate_nz(nz,a) +subroutine psb_c_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_reallocate_nz + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_reallocate_nz #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='c_hlg_reallocate_nz' + character(len=20) :: name='c_cuda_hlg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -64,4 +64,4 @@ subroutine psb_c_hlg_reallocate_nz(nz,a) return -end subroutine psb_c_hlg_reallocate_nz +end subroutine psb_c_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_c_hlg_scal.F90 b/cuda/impl/psb_c_cuda_hlg_scal.F90 similarity index 91% rename from cuda/impl/psb_c_hlg_scal.F90 rename to cuda/impl/psb_c_cuda_hlg_scal.F90 index b2c9d30d..d768048f 100644 --- a/cuda/impl/psb_c_hlg_scal.F90 +++ b/cuda/impl/psb_c_cuda_hlg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_hlg_scal(d,a,info,side) +subroutine psb_c_cuda_hlg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_scal + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_scal #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,4 +72,4 @@ subroutine psb_c_hlg_scal(d,a,info,side) return -end subroutine psb_c_hlg_scal +end subroutine psb_c_cuda_hlg_scal diff --git a/cuda/impl/psb_c_hlg_scals.F90 b/cuda/impl/psb_c_cuda_hlg_scals.F90 similarity index 91% rename from cuda/impl/psb_c_hlg_scals.F90 rename to cuda/impl/psb_c_cuda_hlg_scals.F90 index af2efb19..7574bf94 100644 --- a/cuda/impl/psb_c_hlg_scals.F90 +++ b/cuda/impl/psb_c_cuda_hlg_scals.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hlg_scals(d,a,info) +subroutine psb_c_cuda_hlg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_scals + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_scals #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_c_hlg_scals(d,a,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_c_hlg_scals +end subroutine psb_c_cuda_hlg_scals diff --git a/cuda/impl/psb_c_hlg_to_gpu.F90 b/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_c_hlg_to_gpu.F90 rename to cuda/impl/psb_c_cuda_hlg_to_gpu.F90 index 0d37bc24..d7fc8fb2 100644 --- a/cuda/impl/psb_c_hlg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hlg_to_gpu(a,info,nzrm) +subroutine psb_c_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_to_gpu + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_to_gpu #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -65,4 +65,4 @@ subroutine psb_c_hlg_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_c_hlg_to_gpu +end subroutine psb_c_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_c_hlg_vect_mv.F90 b/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_c_hlg_vect_mv.F90 rename to cuda/impl/psb_c_cuda_hlg_vect_mv.F90 index bc4e2f56..2d7a679e 100644 --- a/cuda/impl/psb_c_hlg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_vect_mv + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_vect_mv #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hlg_vect_mv' + character(len=20) :: name='c_cuda_hlg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -126,4 +126,4 @@ subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hlg_vect_mv +end subroutine psb_c_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_c_hybg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_c_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 index 5cd57fa2..eced26e0 100644 --- a/cuda/impl/psb_c_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_allocate_mnnz(m,n,a,nz) +subroutine psb_c_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_allocate_mnnz + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_allocate_mnnz #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,5 +65,5 @@ subroutine psb_c_hybg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_c_hybg_allocate_mnnz +end subroutine psb_c_cuda_hybg_allocate_mnnz #endif diff --git a/cuda/impl/psb_c_hybg_csmm.F90 b/cuda/impl/psb_c_cuda_hybg_csmm.F90 similarity index 93% rename from cuda/impl/psb_c_hybg_csmm.F90 rename to cuda/impl/psb_c_cuda_hybg_csmm.F90 index 7c8bb582..cc459f66 100644 --- a/cuda/impl/psb_c_hybg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_hybg_csmm.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_csmm + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_csmm #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hybg_csmm' + character(len=20) :: name='c_cuda_hybg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,5 +131,5 @@ subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hybg_csmm +end subroutine psb_c_cuda_hybg_csmm #endif diff --git a/cuda/impl/psb_c_hybg_csmv.F90 b/cuda/impl/psb_c_cuda_hybg_csmv.F90 similarity index 93% rename from cuda/impl/psb_c_hybg_csmv.F90 rename to cuda/impl/psb_c_cuda_hybg_csmv.F90 index 5e15bc1b..ab07d756 100644 --- a/cuda/impl/psb_c_hybg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_hybg_csmv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_csmv + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_csmv #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_c_hybg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hybg_csmv' + character(len=20) :: name='c_cuda_hybg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -134,5 +134,5 @@ subroutine psb_c_hybg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hybg_csmv +end subroutine psb_c_cuda_hybg_csmv #endif diff --git a/cuda/impl/psb_c_hybg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_c_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 index 20bc842d..fcaf49ff 100644 --- a/cuda/impl/psb_c_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_inner_vect_sv + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_inner_vect_sv #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -52,7 +52,7 @@ subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='c_hybg_inner_vect_sv' + character(len=20) :: name='c_cuda_hybg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -84,9 +84,9 @@ subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= czero) then if (yy%is_host()) call yy%sync() @@ -134,5 +134,5 @@ subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hybg_inner_vect_sv +end subroutine psb_c_cuda_hybg_inner_vect_sv #endif diff --git a/cuda/impl/psb_c_hybg_mold.F90 b/cuda/impl/psb_c_cuda_hybg_mold.F90 similarity index 89% rename from cuda/impl/psb_c_hybg_mold.F90 rename to cuda/impl/psb_c_cuda_hybg_mold.F90 index 54dd24c2..6fe4c378 100644 --- a/cuda/impl/psb_c_hybg_mold.F90 +++ b/cuda/impl/psb_c_cuda_hybg_mold.F90 @@ -30,12 +30,12 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_mold(a,b,info) +subroutine psb_c_cuda_hybg_mold(a,b,info) use psb_base_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_mold + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_mold implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_hybg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_hybg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_hybg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,5 +62,5 @@ subroutine psb_c_hybg_mold(a,b,info) return -end subroutine psb_c_hybg_mold +end subroutine psb_c_cuda_hybg_mold #endif diff --git a/cuda/impl/psb_c_hybg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 similarity index 88% rename from cuda/impl/psb_c_hybg_reallocate_nz.F90 rename to cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 index 3272b797..979eaad8 100644 --- a/cuda/impl/psb_c_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_reallocate_nz(nz,a) +subroutine psb_c_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_reallocate_nz + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_reallocate_nz #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='c_hybg_reallocate_nz' + character(len=20) :: name='c_cuda_hybg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,5 +67,5 @@ subroutine psb_c_hybg_reallocate_nz(nz,a) return -end subroutine psb_c_hybg_reallocate_nz +end subroutine psb_c_cuda_hybg_reallocate_nz #endif diff --git a/cuda/impl/psb_c_hybg_scal.F90 b/cuda/impl/psb_c_cuda_hybg_scal.F90 similarity index 91% rename from cuda/impl/psb_c_hybg_scal.F90 rename to cuda/impl/psb_c_cuda_hybg_scal.F90 index 1019f979..ac4d788e 100644 --- a/cuda/impl/psb_c_hybg_scal.F90 +++ b/cuda/impl/psb_c_cuda_hybg_scal.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_scal(d,a,info,side) +subroutine psb_c_cuda_hybg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_scal + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_scal #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,5 +72,5 @@ subroutine psb_c_hybg_scal(d,a,info,side) return -end subroutine psb_c_hybg_scal +end subroutine psb_c_cuda_hybg_scal #endif diff --git a/cuda/impl/psb_c_hybg_scals.F90 b/cuda/impl/psb_c_cuda_hybg_scals.F90 similarity index 91% rename from cuda/impl/psb_c_hybg_scals.F90 rename to cuda/impl/psb_c_cuda_hybg_scals.F90 index 1d09abbb..7def71d2 100644 --- a/cuda/impl/psb_c_hybg_scals.F90 +++ b/cuda/impl/psb_c_cuda_hybg_scals.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_scals(d,a,info) +subroutine psb_c_cuda_hybg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_scals + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_scals #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -72,5 +72,5 @@ subroutine psb_c_hybg_scals(d,a,info) return -end subroutine psb_c_hybg_scals +end subroutine psb_c_cuda_hybg_scals #endif diff --git a/cuda/impl/psb_c_hybg_to_gpu.F90 b/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 similarity index 96% rename from cuda/impl/psb_c_hybg_to_gpu.F90 rename to cuda/impl/psb_c_cuda_hybg_to_gpu.F90 index 107efba9..1a77586e 100644 --- a/cuda/impl/psb_c_hybg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_to_gpu(a,info,nzrm) +subroutine psb_c_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_to_gpu + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_to_gpu #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -150,5 +150,5 @@ subroutine psb_c_hybg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_c_hybg_to_gpu +end subroutine psb_c_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_c_hybg_vect_mv.F90 b/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_c_hybg_vect_mv.F90 rename to cuda/impl/psb_c_cuda_hybg_vect_mv.F90 index 3ed0f7fd..da20ca41 100644 --- a/cuda/impl/psb_c_hybg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 @@ -30,20 +30,20 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_vect_mv + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_vect_mv #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -53,7 +53,7 @@ subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hybg_vect_mv' + character(len=20) :: name='c_cuda_hybg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= czero) then if (yy%is_host()) call yy%sync() @@ -123,5 +123,5 @@ subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hybg_vect_mv +end subroutine psb_c_cuda_hybg_vect_mv #endif diff --git a/cuda/impl/psb_c_mv_csrg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_mv_csrg_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 index d2533c2d..f80a8f87 100644 --- a/cuda/impl/psb_c_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_mv_csrg_from_coo(a,b,info) +subroutine psb_c_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_mv_csrg_from_coo + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_mv_csrg_from_coo #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -62,4 +62,4 @@ subroutine psb_c_mv_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_mv_csrg_from_coo +end subroutine psb_c_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_c_mv_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_c_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 index 3e898e8f..1f23a6c2 100644 --- a/cuda/impl/psb_c_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_mv_csrg_from_fmt(a,b,info) +subroutine psb_c_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_mv_csrg_from_fmt + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_mv_csrg_from_fmt #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer, intent(out) :: info @@ -60,4 +60,4 @@ subroutine psb_c_mv_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_c_mv_csrg_from_fmt +end subroutine psb_c_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_c_mv_diag_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_mv_diag_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 index 34fe69b7..e20e0b0a 100644 --- a/cuda/impl/psb_c_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_c_mv_diag_from_coo(a,b,info) +subroutine psb_c_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_mv_diag_from_coo + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_mv_diag_from_coo #else - use psb_c_diag_mat_mod + use psb_c_cuda_diag_mat_mod #endif implicit none - class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -66,4 +66,4 @@ subroutine psb_c_mv_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_mv_diag_from_coo +end subroutine psb_c_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_c_mv_elg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_mv_elg_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 index acf7e28c..741058cd 100644 --- a/cuda/impl/psb_c_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_mv_elg_from_coo(a,b,info) +subroutine psb_c_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_mv_elg_from_coo + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_mv_elg_from_coo #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_c_mv_elg_from_coo(a,b,info) return -end subroutine psb_c_mv_elg_from_coo +end subroutine psb_c_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_c_mv_elg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 similarity index 92% rename from cuda/impl/psb_c_mv_elg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 index fb9e3cfe..b375bc63 100644 --- a/cuda/impl/psb_c_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_mv_elg_from_fmt(a,b,info) +subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_mv_elg_from_fmt + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_mv_elg_from_fmt #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -96,4 +96,4 @@ subroutine psb_c_mv_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_c_mv_elg_from_fmt +end subroutine psb_c_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_c_mv_hdiag_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_c_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 index 1d07bddb..8826081f 100644 --- a/cuda/impl/psb_c_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_c_mv_hdiag_from_coo(a,b,info) +subroutine psb_c_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_mv_hdiag_from_coo - use psb_gpu_env_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_c_hdiag_mat_mod + use psb_c_cuda_hdiag_mat_mod #endif implicit none - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_c_mv_hdiag_from_coo(a,b,info) #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_c_hdia_sparse_mat%mv_from_coo(b,info) @@ -71,4 +71,4 @@ subroutine psb_c_mv_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_mv_hdiag_from_coo +end subroutine psb_c_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_c_mv_hlg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 similarity index 88% rename from cuda/impl/psb_c_mv_hlg_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 index 0fa2d72d..416bbaed 100644 --- a/cuda/impl/psb_c_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_c_mv_hlg_from_coo(a,b,info) +subroutine psb_c_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_mv_hlg_from_coo + use psb_cuda_env_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_mv_hlg_from_coo #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_c_mv_hlg_from_coo(a,b,info) return -end subroutine psb_c_mv_hlg_from_coo +end subroutine psb_c_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_c_mv_hlg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_c_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 index 0581c7d6..aafe692d 100644 --- a/cuda/impl/psb_c_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_mv_hlg_from_fmt(a,b,info) +subroutine psb_c_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_mv_hlg_from_fmt + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_mv_hlg_from_fmt #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_c_mv_hlg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_c_mv_hlg_from_fmt +end subroutine psb_c_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_c_mv_hybg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_mv_hybg_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 index 7aca6065..eb5ba685 100644 --- a/cuda/impl/psb_c_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_mv_hybg_from_coo(a,b,info) +subroutine psb_c_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_mv_hybg_from_coo + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_mv_hybg_from_coo #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -61,5 +61,5 @@ subroutine psb_c_mv_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_mv_hybg_from_coo +end subroutine psb_c_cuda_mv_hybg_from_coo #endif diff --git a/cuda/impl/psb_c_mv_hybg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_c_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 index 41581b85..d74e89bd 100644 --- a/cuda/impl/psb_c_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_mv_hybg_from_fmt(a,b,info) +subroutine psb_c_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_mv_hybg_from_fmt + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_mv_hybg_from_fmt #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_c_mv_hybg_from_fmt(a,b,info) call a%to_gpu(info) #endif end select -end subroutine psb_c_mv_hybg_from_fmt +end subroutine psb_c_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/impl/psb_d_cp_csrg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_cp_csrg_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 index ec00007e..e3383af1 100644 --- a/cuda/impl/psb_d_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_d_cp_csrg_from_coo(a,b,info) +subroutine psb_d_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_cp_csrg_from_coo + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_cp_csrg_from_coo #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_d_cp_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_cp_csrg_from_coo +end subroutine psb_d_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_d_cp_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_d_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 index b3aabeed..28b46c76 100644 --- a/cuda/impl/psb_d_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_d_cp_csrg_from_fmt(a,b,info) +subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_cp_csrg_from_fmt + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_cp_csrg_from_fmt #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif !use iso_c_binding implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_d_cp_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_d_cp_csrg_from_fmt +end subroutine psb_d_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_d_cp_diag_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_cp_diag_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 index 06aff19d..d21bb469 100644 --- a/cuda/impl/psb_d_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_cp_diag_from_coo(a,b,info) +subroutine psb_d_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_cp_diag_from_coo + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_cp_diag_from_coo #else - use psb_d_diag_mat_mod + use psb_d_cuda_diag_mat_mod #endif implicit none - class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -61,4 +61,4 @@ subroutine psb_d_cp_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_cp_diag_from_coo +end subroutine psb_d_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_d_cp_elg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 similarity index 94% rename from cuda/impl/psb_d_cp_elg_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 index 381e4bfb..a4d58297 100644 --- a/cuda/impl/psb_d_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_d_cp_elg_from_coo(a,b,info) +subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_cp_elg_from_coo + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_cp_elg_from_coo use psi_ext_util_mod - use psb_gpu_env_mod + use psb_cuda_env_mod #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -58,7 +58,7 @@ subroutine psb_d_cp_elg_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - hacksize = max(1,psb_gpu_WarpSize()) + hacksize = max(1,psb_cuda_WarpSize()) #else hacksize = 1 #endif @@ -181,4 +181,4 @@ contains end subroutine psi_d_count_ell_from_coo -end subroutine psb_d_cp_elg_from_coo +end subroutine psb_d_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_d_cp_elg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 similarity index 93% rename from cuda/impl/psb_d_cp_elg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 index 9a6b6d41..31786c1b 100644 --- a/cuda/impl/psb_d_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_cp_elg_from_fmt(a,b,info) +subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_cp_elg_from_fmt + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_cp_elg_from_fmt #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -98,4 +98,4 @@ subroutine psb_d_cp_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_d_cp_elg_from_fmt +end subroutine psb_d_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_d_cp_hdiag_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_d_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 index 443452a1..efcf9d66 100644 --- a/cuda/impl/psb_d_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_d_cp_hdiag_from_coo(a,b,info) +subroutine psb_d_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_cp_hdiag_from_coo - use psb_gpu_env_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_d_hdiag_mat_mod + use psb_d_cuda_hdiag_mat_mod #endif implicit none - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_d_cp_hdiag_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_d_hdia_sparse_mat%cp_from_coo(b,info) @@ -70,4 +70,4 @@ subroutine psb_d_cp_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_cp_hdiag_from_coo +end subroutine psb_d_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_d_cp_hlg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 similarity index 95% rename from cuda/impl/psb_d_cp_hlg_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 index 02855fef..2fc898b2 100644 --- a/cuda/impl/psb_d_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_d_cp_hlg_from_coo(a,b,info) +subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_cp_hlg_from_coo + use psb_cuda_env_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_cp_hlg_from_coo #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -62,7 +62,7 @@ subroutine psb_d_cp_hlg_from_coo(a,b,info) debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() #ifdef HAVE_SPGPU - hksz = max(1,psb_gpu_WarpSize()) + hksz = max(1,psb_cuda_WarpSize()) #else hksz = psi_get_hksz() #endif @@ -195,4 +195,4 @@ contains !!$ write(*,*) 'End of psi_comput_hckoff ',info end subroutine psi_compute_hckoff_from_coo -end subroutine psb_d_cp_hlg_from_coo +end subroutine psb_d_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_d_cp_hlg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 similarity index 90% rename from cuda/impl/psb_d_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 index 133fbb32..0796630c 100644 --- a/cuda/impl/psb_d_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_cp_hlg_from_fmt(a,b,info) +subroutine psb_d_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_cp_hlg_from_fmt + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_cp_hlg_from_fmt #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -65,4 +65,4 @@ subroutine psb_d_cp_hlg_from_fmt(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_cp_hlg_from_fmt +end subroutine psb_d_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_d_cp_hybg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_cp_hybg_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 index a74409cb..f1f62a89 100644 --- a/cuda/impl/psb_d_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_cp_hybg_from_coo(a,b,info) +subroutine psb_d_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_cp_hybg_from_coo + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_cp_hybg_from_coo #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -60,5 +60,5 @@ subroutine psb_d_cp_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_cp_hybg_from_coo +end subroutine psb_d_cuda_cp_hybg_from_coo #endif diff --git a/cuda/impl/psb_d_cp_hybg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_d_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 index 91d59060..37c9cc42 100644 --- a/cuda/impl/psb_d_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_cp_hybg_from_fmt(a,b,info) +subroutine psb_d_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_cp_hybg_from_fmt + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_cp_hybg_from_fmt #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_d_cp_hybg_from_fmt(a,b,info) #endif end select -end subroutine psb_d_cp_hybg_from_fmt +end subroutine psb_d_cuda_cp_hybg_from_fmt #endif diff --git a/cuda/impl/psb_d_csrg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 similarity index 89% rename from cuda/impl/psb_d_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 index 7d2d4470..3858672c 100644 --- a/cuda/impl/psb_d_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_csrg_allocate_mnnz(m,n,a,nz) +subroutine psb_d_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_allocate_mnnz + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_allocate_mnnz #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,4 +65,4 @@ subroutine psb_d_csrg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_d_csrg_allocate_mnnz +end subroutine psb_d_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_d_csrg_csmm.F90 b/cuda/impl/psb_d_cuda_csrg_csmm.F90 similarity index 94% rename from cuda/impl/psb_d_csrg_csmm.F90 rename to cuda/impl/psb_d_cuda_csrg_csmm.F90 index 59c8343e..58251d9a 100644 --- a/cuda/impl/psb_d_csrg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_csrg_csmm.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_csrg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_csmm + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_csmm #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -131,4 +131,4 @@ subroutine psb_d_csrg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_csrg_csmm +end subroutine psb_d_cuda_csrg_csmm diff --git a/cuda/impl/psb_d_csrg_csmv.F90 b/cuda/impl/psb_d_cuda_csrg_csmv.F90 similarity index 93% rename from cuda/impl/psb_d_csrg_csmv.F90 rename to cuda/impl/psb_d_cuda_csrg_csmv.F90 index 44a6428b..269760f0 100644 --- a/cuda/impl/psb_d_csrg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_csrg_csmv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_csrg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_csmv + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_csmv #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_d_csrg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_csrg_csmv' + character(len=20) :: name='d_cuda_csrg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -136,4 +136,4 @@ subroutine psb_d_csrg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_csrg_csmv +end subroutine psb_d_cuda_csrg_csmv diff --git a/cuda/impl/psb_s_csrg_from_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_s_csrg_from_gpu.F90 rename to cuda/impl/psb_d_cuda_csrg_from_gpu.F90 index 23748d97..c451a99f 100644 --- a/cuda/impl/psb_s_csrg_from_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_csrg_from_gpu(a,info) +subroutine psb_d_cuda_csrg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_from_gpu + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_from_gpu #else - use psb_s_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, n, nz @@ -70,4 +70,4 @@ subroutine psb_s_csrg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_s_csrg_from_gpu +end subroutine psb_d_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_d_csrg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_d_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 index 016d63d6..60ee541f 100644 --- a/cuda/impl/psb_d_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_inner_vect_sv + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_inner_vect_sv #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -51,7 +51,7 @@ subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='d_csrg_inner_vect_sv' + character(len=20) :: name='d_cuda_csrg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -83,9 +83,9 @@ subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -133,4 +133,4 @@ subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_csrg_inner_vect_sv +end subroutine psb_d_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_d_csrg_mold.F90 b/cuda/impl/psb_d_cuda_csrg_mold.F90 similarity index 88% rename from cuda/impl/psb_d_csrg_mold.F90 rename to cuda/impl/psb_d_cuda_csrg_mold.F90 index d7288868..eec34975 100644 --- a/cuda/impl/psb_d_csrg_mold.F90 +++ b/cuda/impl/psb_d_cuda_csrg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_csrg_mold(a,b,info) +subroutine psb_d_cuda_csrg_mold(a,b,info) use psb_base_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_mold + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_mold implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_csrg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_csrg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_csrg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_d_csrg_mold(a,b,info) return -end subroutine psb_d_csrg_mold +end subroutine psb_d_cuda_csrg_mold diff --git a/cuda/impl/psb_d_csrg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_d_csrg_reallocate_nz.F90 rename to cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 index 083091f5..dbf34958 100644 --- a/cuda/impl/psb_d_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_d_csrg_reallocate_nz(nz,a) +subroutine psb_d_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_reallocate_nz + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_reallocate_nz #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='d_csrg_reallocate_nz' + character(len=20) :: name='d_cuda_csrg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,4 +67,4 @@ subroutine psb_d_csrg_reallocate_nz(nz,a) return -end subroutine psb_d_csrg_reallocate_nz +end subroutine psb_d_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_d_csrg_scal.F90 b/cuda/impl/psb_d_cuda_csrg_scal.F90 similarity index 90% rename from cuda/impl/psb_d_csrg_scal.F90 rename to cuda/impl/psb_d_cuda_csrg_scal.F90 index 60dbaecd..73e1b9f3 100644 --- a/cuda/impl/psb_d_csrg_scal.F90 +++ b/cuda/impl/psb_d_cuda_csrg_scal.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_d_csrg_scal(d,a,info,side) +subroutine psb_d_cuda_csrg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_scal + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_scal #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -70,4 +70,4 @@ subroutine psb_d_csrg_scal(d,a,info,side) return -end subroutine psb_d_csrg_scal +end subroutine psb_d_cuda_csrg_scal diff --git a/cuda/impl/psb_d_csrg_scals.F90 b/cuda/impl/psb_d_cuda_csrg_scals.F90 similarity index 90% rename from cuda/impl/psb_d_csrg_scals.F90 rename to cuda/impl/psb_d_cuda_csrg_scals.F90 index 6d4a1f40..cf8d6270 100644 --- a/cuda/impl/psb_d_csrg_scals.F90 +++ b/cuda/impl/psb_d_cuda_csrg_scals.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_d_csrg_scals(d,a,info) +subroutine psb_d_cuda_csrg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_scals + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_scals #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -68,4 +68,4 @@ subroutine psb_d_csrg_scals(d,a,info) return -end subroutine psb_d_csrg_scals +end subroutine psb_d_cuda_csrg_scals diff --git a/cuda/impl/psb_d_csrg_to_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 similarity index 98% rename from cuda/impl/psb_d_csrg_to_gpu.F90 rename to cuda/impl/psb_d_cuda_csrg_to_gpu.F90 index eb5d3942..a0e72cb4 100644 --- a/cuda/impl/psb_d_csrg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_d_csrg_to_gpu(a,info,nzrm) +subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_to_gpu + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_to_gpu #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -322,4 +322,4 @@ subroutine psb_d_csrg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_d_csrg_to_gpu +end subroutine psb_d_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_d_csrg_vect_mv.F90 b/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_d_csrg_vect_mv.F90 rename to cuda/impl/psb_d_cuda_csrg_vect_mv.F90 index f7124bbb..b828d878 100644 --- a/cuda/impl/psb_d_csrg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_vect_mv + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_vect_mv #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -54,7 +54,7 @@ subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_csrg_vect_mv' + character(len=20) :: name='d_cuda_csrg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -122,4 +122,4 @@ subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) 9999 call psb_error_handler(err_act) return -end subroutine psb_d_csrg_vect_mv +end subroutine psb_d_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_d_diag_csmv.F90 b/cuda/impl/psb_d_cuda_diag_csmv.F90 similarity index 92% rename from cuda/impl/psb_d_diag_csmv.F90 rename to cuda/impl/psb_d_cuda_diag_csmv.F90 index af9ad2db..8b49769e 100644 --- a/cuda/impl/psb_d_diag_csmv.F90 +++ b/cuda/impl/psb_d_cuda_diag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_csmv + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_csmv #else - use psb_d_diag_mat_mod + use psb_d_cuda_diag_mat_mod #endif implicit none - class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='d_diag_csmv' + character(len=20) :: name='d_cuda_diag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_diag_csmv +end subroutine psb_d_cuda_diag_csmv diff --git a/cuda/impl/psb_d_diag_mold.F90 b/cuda/impl/psb_d_cuda_diag_mold.F90 similarity index 88% rename from cuda/impl/psb_d_diag_mold.F90 rename to cuda/impl/psb_d_cuda_diag_mold.F90 index 4b0d066a..7cea069e 100644 --- a/cuda/impl/psb_d_diag_mold.F90 +++ b/cuda/impl/psb_d_cuda_diag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_diag_mold(a,b,info) +subroutine psb_d_cuda_diag_mold(a,b,info) use psb_base_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_mold + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_mold implicit none - class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_diag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_diag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_diag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_d_diag_mold(a,b,info) return -end subroutine psb_d_diag_mold +end subroutine psb_d_cuda_diag_mold diff --git a/cuda/impl/psb_d_diag_to_gpu.F90 b/cuda/impl/psb_d_cuda_diag_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_d_diag_to_gpu.F90 rename to cuda/impl/psb_d_cuda_diag_to_gpu.F90 index de244124..4903de8a 100644 --- a/cuda/impl/psb_d_diag_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_diag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_diag_to_gpu(a,info,nzrm) +subroutine psb_d_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_to_gpu + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_to_gpu #else - use psb_d_diag_mat_mod + use psb_d_cuda_diag_mat_mod #endif use iso_c_binding implicit none - class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -71,4 +71,4 @@ subroutine psb_d_diag_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_d_diag_to_gpu +end subroutine psb_d_cuda_diag_to_gpu diff --git a/cuda/impl/psb_d_diag_vect_mv.F90 b/cuda/impl/psb_d_cuda_diag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_d_diag_vect_mv.F90 rename to cuda/impl/psb_d_cuda_diag_vect_mv.F90 index 3f2f5ac6..0f23d363 100644 --- a/cuda/impl/psb_d_diag_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_diag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_vect_mv + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_vect_mv #else - use psb_d_diag_mat_mod + use psb_d_cuda_diag_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_diag_vect_mv' + character(len=20) :: name='d_cuda_diag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_diag_vect_mv +end subroutine psb_d_cuda_diag_vect_mv diff --git a/cuda/impl/psb_d_dnsg_mat_impl.F90 b/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 similarity index 77% rename from cuda/impl/psb_d_dnsg_mat_impl.F90 rename to cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 index a7915898..8d922d82 100644 --- a/cuda/impl/psb_d_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_dnsg_vect_mv + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_vect_mv #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(in) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -50,7 +50,7 @@ subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) character :: trans_ real(psb_dpk_), allocatable :: rx(:), ry(:) Integer(Psb_ipk_) :: err_act, m, n, k - character(len=20) :: name='d_dnsg_vect_mv' + character(len=20) :: name='d_cuda_dnsg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -76,9 +76,9 @@ subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) k = a%get_nrows() end if select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= dzero) then @@ -117,21 +117,21 @@ subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_dnsg_vect_mv +end subroutine psb_d_cuda_dnsg_vect_mv -subroutine psb_d_dnsg_mold(a,b,info) +subroutine psb_d_cuda_dnsg_mold(a,b,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_dnsg_mold + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_mold #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(in) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -145,7 +145,7 @@ subroutine psb_d_dnsg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_dnsg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_dnsg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -158,54 +158,54 @@ subroutine psb_d_dnsg_mold(a,b,info) return -end subroutine psb_d_dnsg_mold +end subroutine psb_d_cuda_dnsg_mold !!$ !!$ interface -!!$ subroutine psb_d_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_d_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type -!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_d_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_dnsg_inner_vect_sv +!!$ end subroutine psb_d_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_d_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_d_dnsg_reallocate_nz +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_d_dnsg_allocate_mnnz +!!$ end subroutine psb_d_cuda_dnsg_allocate_mnnz !!$ end interface -subroutine psb_d_dnsg_to_gpu(a,info) +subroutine psb_d_cuda_dnsg_to_gpu(a,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_dnsg_to_gpu + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_to_gpu #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda logical, parameter :: debug=.false. - character(len=20) :: name='d_dnsg_to_gpu' + character(len=20) :: name='d_cuda_dnsg_to_gpu' call psb_erractionsave(err_act) info = psb_success_ @@ -226,27 +226,27 @@ subroutine psb_d_dnsg_to_gpu(a,info) return -end subroutine psb_d_dnsg_to_gpu +end subroutine psb_d_cuda_dnsg_to_gpu -subroutine psb_d_cp_dnsg_from_coo(a,b,info) +subroutine psb_d_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_cp_dnsg_from_coo + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_cp_dnsg_from_coo #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_dnsg_cp_from_coo' + character(len=20) :: name='d_cuda_dnsg_cp_from_coo' integer(psb_ipk_) :: debug_level, debug_unit logical, parameter :: debug=.false. type(psb_d_coo_sparse_mat) :: tmp @@ -267,27 +267,27 @@ subroutine psb_d_cp_dnsg_from_coo(a,b,info) return -end subroutine psb_d_cp_dnsg_from_coo +end subroutine psb_d_cuda_cp_dnsg_from_coo -subroutine psb_d_cp_dnsg_from_fmt(a,b,info) +subroutine psb_d_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_cp_dnsg_from_fmt + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_cp_dnsg_from_fmt #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info type(psb_d_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_dnsg_cp_from_fmt' + character(len=20) :: name='d_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -341,29 +341,29 @@ subroutine psb_d_cp_dnsg_from_fmt(a,b,info) return -end subroutine psb_d_cp_dnsg_from_fmt +end subroutine psb_d_cuda_cp_dnsg_from_fmt -subroutine psb_d_mv_dnsg_from_coo(a,b,info) +subroutine psb_d_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_mv_dnsg_from_coo + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_mv_dnsg_from_coo #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act logical, parameter :: debug=.false. - character(len=20) :: name='d_dnsg_mv_from_coo' + character(len=20) :: name='d_cuda_dnsg_mv_from_coo' call psb_erractionsave(err_act) info = psb_success_ @@ -382,28 +382,28 @@ subroutine psb_d_mv_dnsg_from_coo(a,b,info) return -end subroutine psb_d_mv_dnsg_from_coo +end subroutine psb_d_cuda_mv_dnsg_from_coo -subroutine psb_d_mv_dnsg_from_fmt(a,b,info) +subroutine psb_d_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_mv_dnsg_from_fmt + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_mv_dnsg_from_fmt #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info type(psb_d_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_dnsg_cp_from_fmt' + character(len=20) :: name='d_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -458,4 +458,4 @@ subroutine psb_d_mv_dnsg_from_fmt(a,b,info) return -end subroutine psb_d_mv_dnsg_from_fmt +end subroutine psb_d_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_d_elg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 similarity index 93% rename from cuda/impl/psb_d_elg_allocate_mnnz.F90 rename to cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 index 105f5617..b9308514 100644 --- a/cuda/impl/psb_d_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_elg_allocate_mnnz(m,n,a,nz) +subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_allocate_mnnz + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_allocate_mnnz #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -110,4 +110,4 @@ subroutine psb_d_elg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_d_elg_allocate_mnnz +end subroutine psb_d_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_s_elg_asb.f90 b/cuda/impl/psb_d_cuda_elg_asb.f90 similarity index 92% rename from cuda/impl/psb_s_elg_asb.f90 rename to cuda/impl/psb_d_cuda_elg_asb.f90 index 190be710..c158ccde 100644 --- a/cuda/impl/psb_s_elg_asb.f90 +++ b/cuda/impl/psb_d_cuda_elg_asb.f90 @@ -30,13 +30,13 @@ ! -subroutine psb_s_elg_asb(a) +subroutine psb_d_cuda_elg_asb(a) use psb_base_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_asb + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_asb implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: err_act, info character(len=20) :: name='elg_asb' @@ -62,4 +62,4 @@ subroutine psb_s_elg_asb(a) return -end subroutine psb_s_elg_asb +end subroutine psb_d_cuda_elg_asb diff --git a/cuda/impl/psb_d_elg_csmm.F90 b/cuda/impl/psb_d_cuda_elg_csmm.F90 similarity index 93% rename from cuda/impl/psb_d_elg_csmm.F90 rename to cuda/impl/psb_d_cuda_elg_csmm.F90 index add9c3b2..2d9883fa 100644 --- a/cuda/impl/psb_d_elg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_elg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csmm + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csmm #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_elg_csmm' + character(len=20) :: name='d_cuda_elg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,4 +131,4 @@ subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_elg_csmm +end subroutine psb_d_cuda_elg_csmm diff --git a/cuda/impl/psb_d_elg_csmv.F90 b/cuda/impl/psb_d_cuda_elg_csmv.F90 similarity index 94% rename from cuda/impl/psb_d_elg_csmv.F90 rename to cuda/impl/psb_d_cuda_elg_csmv.F90 index 5237fb75..6420e28d 100644 --- a/cuda/impl/psb_d_elg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_elg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csmv + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csmv #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -133,4 +133,4 @@ subroutine psb_d_elg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_elg_csmv +end subroutine psb_d_cuda_elg_csmv diff --git a/cuda/impl/psb_d_elg_csput.F90 b/cuda/impl/psb_d_cuda_elg_csput.F90 similarity index 89% rename from cuda/impl/psb_d_elg_csput.F90 rename to cuda/impl/psb_d_cuda_elg_csput.F90 index 107c69d0..19d26c43 100644 --- a/cuda/impl/psb_d_elg_csput.F90 +++ b/cuda/impl/psb_d_cuda_elg_csput.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_d_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_d_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csput_a + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csput_a #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - character(len=20) :: name='d_elg_csput_a' + character(len=20) :: name='d_cuda_elg_csput_a' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit real(psb_dpk_) :: t1,t2,t3 @@ -120,24 +120,24 @@ subroutine psb_d_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_d_elg_csput_a +end subroutine psb_d_cuda_elg_csput_a -subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_d_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csput_v - use psb_d_gpu_vect_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csput_v + use psb_d_cuda_vect_mod #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax @@ -145,7 +145,7 @@ subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act - character(len=20) :: name='d_elg_csput_v' + character(len=20) :: name='d_cuda_elg_csput_v' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw logical :: gpu_invoked @@ -199,11 +199,11 @@ subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) t1=psb_wtime() gpu_invoked = .false. select type (ia) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (ja) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (val) - class is (psb_d_vect_gpu) + class is (psb_d_vect_cuda) if (a%is_host()) call a%sync() if (val%is_host()) call val%sync() if (ia%is_host()) call ia%sync() @@ -245,4 +245,4 @@ subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_d_elg_csput_v +end subroutine psb_d_cuda_elg_csput_v diff --git a/cuda/impl/psb_d_elg_from_gpu.F90 b/cuda/impl/psb_d_cuda_elg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_d_elg_from_gpu.F90 rename to cuda/impl/psb_d_cuda_elg_from_gpu.F90 index c1da9584..b532a83c 100644 --- a/cuda/impl/psb_d_elg_from_gpu.F90 +++ b/cuda/impl/psb_d_cuda_elg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_from_gpu(a,info) +subroutine psb_d_cuda_elg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_from_gpu + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_from_gpu #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize @@ -71,4 +71,4 @@ subroutine psb_d_elg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_d_elg_from_gpu +end subroutine psb_d_cuda_elg_from_gpu diff --git a/cuda/impl/psb_d_elg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 similarity index 89% rename from cuda/impl/psb_d_elg_inner_vect_sv.F90 rename to cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 index 333946bf..c262969f 100644 --- a/cuda/impl/psb_d_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_d_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_inner_vect_sv + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_inner_vect_sv #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans integer(psb_ipk_) :: err_act - character(len=20) :: name='d_elg_inner_vect_sv' + character(len=20) :: name='d_cuda_elg_inner_vect_sv' logical, parameter :: debug=.false. real(psb_dpk_), allocatable :: rx(:), ry(:) @@ -86,4 +86,4 @@ subroutine psb_d_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_elg_inner_vect_sv +end subroutine psb_d_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_d_elg_mold.F90 b/cuda/impl/psb_d_cuda_elg_mold.F90 similarity index 89% rename from cuda/impl/psb_d_elg_mold.F90 rename to cuda/impl/psb_d_cuda_elg_mold.F90 index 3fd6d071..f887f96f 100644 --- a/cuda/impl/psb_d_elg_mold.F90 +++ b/cuda/impl/psb_d_cuda_elg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_elg_mold(a,b,info) +subroutine psb_d_cuda_elg_mold(a,b,info) use psb_base_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_mold + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_mold implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_elg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_elg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_elg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_d_elg_mold(a,b,info) return -end subroutine psb_d_elg_mold +end subroutine psb_d_cuda_elg_mold diff --git a/cuda/impl/psb_d_elg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 similarity index 89% rename from cuda/impl/psb_d_elg_reallocate_nz.F90 rename to cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 index 70b3705c..66c583e1 100644 --- a/cuda/impl/psb_d_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_d_elg_reallocate_nz(nz,a) +subroutine psb_d_cuda_elg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_reallocate_nz + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_reallocate_nz #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='d_elg_reallocate_nz' + character(len=20) :: name='d_cuda_elg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -76,4 +76,4 @@ subroutine psb_d_elg_reallocate_nz(nz,a) return -end subroutine psb_d_elg_reallocate_nz +end subroutine psb_d_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_d_elg_scal.F90 b/cuda/impl/psb_d_cuda_elg_scal.F90 similarity index 91% rename from cuda/impl/psb_d_elg_scal.F90 rename to cuda/impl/psb_d_cuda_elg_scal.F90 index 53ab82d7..7aa21c93 100644 --- a/cuda/impl/psb_d_elg_scal.F90 +++ b/cuda/impl/psb_d_cuda_elg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_scal(d,a,info,side) +subroutine psb_d_cuda_elg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_scal + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_scal #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -75,4 +75,4 @@ subroutine psb_d_elg_scal(d,a,info,side) return -end subroutine psb_d_elg_scal +end subroutine psb_d_cuda_elg_scal diff --git a/cuda/impl/psb_d_elg_scals.F90 b/cuda/impl/psb_d_cuda_elg_scals.F90 similarity index 90% rename from cuda/impl/psb_d_elg_scals.F90 rename to cuda/impl/psb_d_cuda_elg_scals.F90 index f85780ce..1950b366 100644 --- a/cuda/impl/psb_d_elg_scals.F90 +++ b/cuda/impl/psb_d_cuda_elg_scals.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_scals(d,a,info) +subroutine psb_d_cuda_elg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_scals + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_scals #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_d_elg_scals(d,a,info) return -end subroutine psb_d_elg_scals +end subroutine psb_d_cuda_elg_scals diff --git a/cuda/impl/psb_d_elg_to_gpu.F90 b/cuda/impl/psb_d_cuda_elg_to_gpu.F90 similarity index 93% rename from cuda/impl/psb_d_elg_to_gpu.F90 rename to cuda/impl/psb_d_cuda_elg_to_gpu.F90 index 28e61606..b589ec2d 100644 --- a/cuda/impl/psb_d_elg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_elg_to_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_to_gpu(a,info,nzrm) +subroutine psb_d_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_to_gpu + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_to_gpu #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -90,4 +90,4 @@ subroutine psb_d_elg_to_gpu(a,info,nzrm) call a%set_sync() #endif -end subroutine psb_d_elg_to_gpu +end subroutine psb_d_cuda_elg_to_gpu diff --git a/cuda/impl/psb_s_elg_trim.f90 b/cuda/impl/psb_d_cuda_elg_trim.f90 similarity index 92% rename from cuda/impl/psb_s_elg_trim.f90 rename to cuda/impl/psb_d_cuda_elg_trim.f90 index f3bd3b2f..be573c8c 100644 --- a/cuda/impl/psb_s_elg_trim.f90 +++ b/cuda/impl/psb_d_cuda_elg_trim.f90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_elg_trim(a) +subroutine psb_d_cuda_elg_trim(a) use psb_base_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_trim + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_trim implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld character(len=20) :: name='trim' logical, parameter :: debug=.false. @@ -59,4 +59,4 @@ subroutine psb_s_elg_trim(a) return -end subroutine psb_s_elg_trim +end subroutine psb_d_cuda_elg_trim diff --git a/cuda/impl/psb_d_elg_vect_mv.F90 b/cuda/impl/psb_d_cuda_elg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_d_elg_vect_mv.F90 rename to cuda/impl/psb_d_cuda_elg_vect_mv.F90 index e46f84da..1be57d22 100644 --- a/cuda/impl/psb_d_elg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_elg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_vect_mv + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_vect_mv #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_elg_vect_mv' + character(len=20) :: name='d_cuda_elg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= dzero) then @@ -128,4 +128,4 @@ subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_elg_vect_mv +end subroutine psb_d_cuda_elg_vect_mv diff --git a/cuda/impl/psb_d_hdiag_csmv.F90 b/cuda/impl/psb_d_cuda_hdiag_csmv.F90 similarity index 92% rename from cuda/impl/psb_d_hdiag_csmv.F90 rename to cuda/impl/psb_d_cuda_hdiag_csmv.F90 index 6f6bcedf..4bcd6e7a 100644 --- a/cuda/impl/psb_d_hdiag_csmv.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_csmv + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_csmv #else - use psb_d_hdiag_mat_mod + use psb_d_cuda_hdiag_mat_mod #endif implicit none - class(psb_d_hdiag_sparse_mat), intent(in) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='d_hdiag_csmv' + character(len=20) :: name='d_cuda_hdiag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hdiag_csmv +end subroutine psb_d_cuda_hdiag_csmv diff --git a/cuda/impl/psb_d_hdiag_mold.F90 b/cuda/impl/psb_d_cuda_hdiag_mold.F90 similarity index 88% rename from cuda/impl/psb_d_hdiag_mold.F90 rename to cuda/impl/psb_d_cuda_hdiag_mold.F90 index b6c254e9..c5028c07 100644 --- a/cuda/impl/psb_d_hdiag_mold.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_hdiag_mold(a,b,info) +subroutine psb_d_cuda_hdiag_mold(a,b,info) use psb_base_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_mold + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_mold implicit none - class(psb_d_hdiag_sparse_mat), intent(in) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_hdiag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_hdiag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_hdiag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_d_hdiag_mold(a,b,info) return -end subroutine psb_d_hdiag_mold +end subroutine psb_d_cuda_hdiag_mold diff --git a/cuda/impl/psb_s_hdiag_to_gpu.F90 b/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 similarity index 92% rename from cuda/impl/psb_s_hdiag_to_gpu.F90 rename to cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 index ade1c080..ca79b9fa 100644 --- a/cuda/impl/psb_s_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hdiag_to_gpu(a,info) +subroutine psb_d_cuda_hdiag_to_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_to_gpu + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_to_gpu #else - use psb_s_hdiag_mat_mod + use psb_d_cuda_hdiag_mat_mod #endif use iso_c_binding implicit none - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight #ifdef HAVE_SPGPU @@ -83,4 +83,4 @@ subroutine psb_s_hdiag_to_gpu(a,info) #endif -end subroutine psb_s_hdiag_to_gpu +end subroutine psb_d_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_d_hdiag_vect_mv.F90 b/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_d_hdiag_vect_mv.F90 rename to cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 index db7ec9c6..74233f90 100644 --- a/cuda/impl/psb_d_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_vect_mv + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_vect_mv #else - use psb_d_hdiag_mat_mod + use psb_d_cuda_hdiag_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_hdiag_sparse_mat), intent(in) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hdiag_vect_mv' + character(len=20) :: name='d_cuda_hdiag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hdiag_vect_mv +end subroutine psb_d_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_d_hlg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_d_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 index 6f327e81..3382327f 100644 --- a/cuda/impl/psb_d_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hlg_allocate_mnnz(m,n,a,nz) +subroutine psb_d_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_allocate_mnnz + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_allocate_mnnz #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -68,4 +68,4 @@ subroutine psb_d_hlg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_d_hlg_allocate_mnnz +end subroutine psb_d_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_d_hlg_csmm.F90 b/cuda/impl/psb_d_cuda_hlg_csmm.F90 similarity index 93% rename from cuda/impl/psb_d_hlg_csmm.F90 rename to cuda/impl/psb_d_cuda_hlg_csmm.F90 index 120f3e06..a223aace 100644 --- a/cuda/impl/psb_d_hlg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_hlg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_csmm + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_csmm #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hlg_csmm' + character(len=20) :: name='d_cuda_hlg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -129,4 +129,4 @@ subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hlg_csmm +end subroutine psb_d_cuda_hlg_csmm diff --git a/cuda/impl/psb_d_hlg_csmv.F90 b/cuda/impl/psb_d_cuda_hlg_csmv.F90 similarity index 93% rename from cuda/impl/psb_d_hlg_csmv.F90 rename to cuda/impl/psb_d_cuda_hlg_csmv.F90 index 4584826d..04779296 100644 --- a/cuda/impl/psb_d_hlg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_hlg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_hlg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_csmv + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_csmv #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_hlg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='d_hlg_csmv' + character(len=20) :: name='d_cuda_hlg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -132,4 +132,4 @@ subroutine psb_d_hlg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hlg_csmv +end subroutine psb_d_cuda_hlg_csmv diff --git a/cuda/impl/psb_d_hlg_from_gpu.F90 b/cuda/impl/psb_d_cuda_hlg_from_gpu.F90 similarity index 92% rename from cuda/impl/psb_d_hlg_from_gpu.F90 rename to cuda/impl/psb_d_cuda_hlg_from_gpu.F90 index eec714f4..7c1a2de8 100644 --- a/cuda/impl/psb_d_hlg_from_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hlg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_hlg_from_gpu(a,info) +subroutine psb_d_cuda_hlg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_from_gpu + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_from_gpu #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr @@ -73,4 +73,4 @@ subroutine psb_d_hlg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_d_hlg_from_gpu +end subroutine psb_d_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_d_hlg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_d_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 index 0ad867a3..c6bd68b5 100644 --- a/cuda/impl/psb_d_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_inner_vect_sv + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_inner_vect_sv #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -78,4 +78,4 @@ subroutine psb_d_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hlg_inner_vect_sv +end subroutine psb_d_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_d_hlg_mold.F90 b/cuda/impl/psb_d_cuda_hlg_mold.F90 similarity index 89% rename from cuda/impl/psb_d_hlg_mold.F90 rename to cuda/impl/psb_d_cuda_hlg_mold.F90 index 3ce9f33a..dddce134 100644 --- a/cuda/impl/psb_d_hlg_mold.F90 +++ b/cuda/impl/psb_d_cuda_hlg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_hlg_mold(a,b,info) +subroutine psb_d_cuda_hlg_mold(a,b,info) use psb_base_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_mold + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_mold implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer, intent(out) :: info Integer :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_hlg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_hlg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_hlg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -61,4 +61,4 @@ subroutine psb_d_hlg_mold(a,b,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_d_hlg_mold +end subroutine psb_d_cuda_hlg_mold diff --git a/cuda/impl/psb_d_hlg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_d_hlg_reallocate_nz.F90 rename to cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 index c9fa4771..aa2954d6 100644 --- a/cuda/impl/psb_d_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_d_hlg_reallocate_nz(nz,a) +subroutine psb_d_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_reallocate_nz + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_reallocate_nz #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='d_hlg_reallocate_nz' + character(len=20) :: name='d_cuda_hlg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -64,4 +64,4 @@ subroutine psb_d_hlg_reallocate_nz(nz,a) return -end subroutine psb_d_hlg_reallocate_nz +end subroutine psb_d_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_d_hlg_scal.F90 b/cuda/impl/psb_d_cuda_hlg_scal.F90 similarity index 91% rename from cuda/impl/psb_d_hlg_scal.F90 rename to cuda/impl/psb_d_cuda_hlg_scal.F90 index b487303d..3cbfada0 100644 --- a/cuda/impl/psb_d_hlg_scal.F90 +++ b/cuda/impl/psb_d_cuda_hlg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_hlg_scal(d,a,info,side) +subroutine psb_d_cuda_hlg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_scal + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_scal #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,4 +72,4 @@ subroutine psb_d_hlg_scal(d,a,info,side) return -end subroutine psb_d_hlg_scal +end subroutine psb_d_cuda_hlg_scal diff --git a/cuda/impl/psb_d_hlg_scals.F90 b/cuda/impl/psb_d_cuda_hlg_scals.F90 similarity index 91% rename from cuda/impl/psb_d_hlg_scals.F90 rename to cuda/impl/psb_d_cuda_hlg_scals.F90 index e3f676e9..1ddf764f 100644 --- a/cuda/impl/psb_d_hlg_scals.F90 +++ b/cuda/impl/psb_d_cuda_hlg_scals.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hlg_scals(d,a,info) +subroutine psb_d_cuda_hlg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_scals + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_scals #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_d_hlg_scals(d,a,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_d_hlg_scals +end subroutine psb_d_cuda_hlg_scals diff --git a/cuda/impl/psb_d_hlg_to_gpu.F90 b/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_d_hlg_to_gpu.F90 rename to cuda/impl/psb_d_cuda_hlg_to_gpu.F90 index 5e3b3558..82737315 100644 --- a/cuda/impl/psb_d_hlg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hlg_to_gpu(a,info,nzrm) +subroutine psb_d_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_to_gpu + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_to_gpu #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -65,4 +65,4 @@ subroutine psb_d_hlg_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_d_hlg_to_gpu +end subroutine psb_d_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_d_hlg_vect_mv.F90 b/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_d_hlg_vect_mv.F90 rename to cuda/impl/psb_d_cuda_hlg_vect_mv.F90 index cd5e95e5..9d0741c4 100644 --- a/cuda/impl/psb_d_hlg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_vect_mv + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_vect_mv #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hlg_vect_mv' + character(len=20) :: name='d_cuda_hlg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -126,4 +126,4 @@ subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hlg_vect_mv +end subroutine psb_d_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_d_hybg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_d_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 index 1565a719..b0bff6c0 100644 --- a/cuda/impl/psb_d_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_allocate_mnnz(m,n,a,nz) +subroutine psb_d_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_allocate_mnnz + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_allocate_mnnz #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,5 +65,5 @@ subroutine psb_d_hybg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_d_hybg_allocate_mnnz +end subroutine psb_d_cuda_hybg_allocate_mnnz #endif diff --git a/cuda/impl/psb_d_hybg_csmm.F90 b/cuda/impl/psb_d_cuda_hybg_csmm.F90 similarity index 93% rename from cuda/impl/psb_d_hybg_csmm.F90 rename to cuda/impl/psb_d_cuda_hybg_csmm.F90 index abc0e0c2..3fcfd17f 100644 --- a/cuda/impl/psb_d_hybg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_hybg_csmm.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_csmm + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_csmm #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hybg_csmm' + character(len=20) :: name='d_cuda_hybg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,5 +131,5 @@ subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hybg_csmm +end subroutine psb_d_cuda_hybg_csmm #endif diff --git a/cuda/impl/psb_d_hybg_csmv.F90 b/cuda/impl/psb_d_cuda_hybg_csmv.F90 similarity index 93% rename from cuda/impl/psb_d_hybg_csmv.F90 rename to cuda/impl/psb_d_cuda_hybg_csmv.F90 index c636ec8a..5e06f633 100644 --- a/cuda/impl/psb_d_hybg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_hybg_csmv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_csmv + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_csmv #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_d_hybg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hybg_csmv' + character(len=20) :: name='d_cuda_hybg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -134,5 +134,5 @@ subroutine psb_d_hybg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hybg_csmv +end subroutine psb_d_cuda_hybg_csmv #endif diff --git a/cuda/impl/psb_d_hybg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_d_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 index 82b536ca..a30c1abe 100644 --- a/cuda/impl/psb_d_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_inner_vect_sv + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_inner_vect_sv #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -52,7 +52,7 @@ subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='d_hybg_inner_vect_sv' + character(len=20) :: name='d_cuda_hybg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -84,9 +84,9 @@ subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -134,5 +134,5 @@ subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hybg_inner_vect_sv +end subroutine psb_d_cuda_hybg_inner_vect_sv #endif diff --git a/cuda/impl/psb_d_hybg_mold.F90 b/cuda/impl/psb_d_cuda_hybg_mold.F90 similarity index 89% rename from cuda/impl/psb_d_hybg_mold.F90 rename to cuda/impl/psb_d_cuda_hybg_mold.F90 index 27390db0..fa239696 100644 --- a/cuda/impl/psb_d_hybg_mold.F90 +++ b/cuda/impl/psb_d_cuda_hybg_mold.F90 @@ -30,12 +30,12 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_mold(a,b,info) +subroutine psb_d_cuda_hybg_mold(a,b,info) use psb_base_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_mold + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_mold implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_hybg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_hybg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_hybg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,5 +62,5 @@ subroutine psb_d_hybg_mold(a,b,info) return -end subroutine psb_d_hybg_mold +end subroutine psb_d_cuda_hybg_mold #endif diff --git a/cuda/impl/psb_d_hybg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 similarity index 88% rename from cuda/impl/psb_d_hybg_reallocate_nz.F90 rename to cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 index 537101e9..cadce8d3 100644 --- a/cuda/impl/psb_d_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_reallocate_nz(nz,a) +subroutine psb_d_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_reallocate_nz + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_reallocate_nz #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='d_hybg_reallocate_nz' + character(len=20) :: name='d_cuda_hybg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,5 +67,5 @@ subroutine psb_d_hybg_reallocate_nz(nz,a) return -end subroutine psb_d_hybg_reallocate_nz +end subroutine psb_d_cuda_hybg_reallocate_nz #endif diff --git a/cuda/impl/psb_d_hybg_scal.F90 b/cuda/impl/psb_d_cuda_hybg_scal.F90 similarity index 91% rename from cuda/impl/psb_d_hybg_scal.F90 rename to cuda/impl/psb_d_cuda_hybg_scal.F90 index 32ef2da0..126e25cb 100644 --- a/cuda/impl/psb_d_hybg_scal.F90 +++ b/cuda/impl/psb_d_cuda_hybg_scal.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_scal(d,a,info,side) +subroutine psb_d_cuda_hybg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_scal + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_scal #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,5 +72,5 @@ subroutine psb_d_hybg_scal(d,a,info,side) return -end subroutine psb_d_hybg_scal +end subroutine psb_d_cuda_hybg_scal #endif diff --git a/cuda/impl/psb_d_hybg_scals.F90 b/cuda/impl/psb_d_cuda_hybg_scals.F90 similarity index 91% rename from cuda/impl/psb_d_hybg_scals.F90 rename to cuda/impl/psb_d_cuda_hybg_scals.F90 index 8c38328a..88b7e05c 100644 --- a/cuda/impl/psb_d_hybg_scals.F90 +++ b/cuda/impl/psb_d_cuda_hybg_scals.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_scals(d,a,info) +subroutine psb_d_cuda_hybg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_scals + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_scals #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -72,5 +72,5 @@ subroutine psb_d_hybg_scals(d,a,info) return -end subroutine psb_d_hybg_scals +end subroutine psb_d_cuda_hybg_scals #endif diff --git a/cuda/impl/psb_d_hybg_to_gpu.F90 b/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 similarity index 96% rename from cuda/impl/psb_d_hybg_to_gpu.F90 rename to cuda/impl/psb_d_cuda_hybg_to_gpu.F90 index 33bf55b8..d94a75c7 100644 --- a/cuda/impl/psb_d_hybg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_to_gpu(a,info,nzrm) +subroutine psb_d_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_to_gpu + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_to_gpu #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -150,5 +150,5 @@ subroutine psb_d_hybg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_d_hybg_to_gpu +end subroutine psb_d_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_d_hybg_vect_mv.F90 b/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_d_hybg_vect_mv.F90 rename to cuda/impl/psb_d_cuda_hybg_vect_mv.F90 index d9653a48..9d0aedb7 100644 --- a/cuda/impl/psb_d_hybg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 @@ -30,20 +30,20 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_vect_mv + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_vect_mv #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -53,7 +53,7 @@ subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hybg_vect_mv' + character(len=20) :: name='d_cuda_hybg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,5 +123,5 @@ subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hybg_vect_mv +end subroutine psb_d_cuda_hybg_vect_mv #endif diff --git a/cuda/impl/psb_d_mv_csrg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_mv_csrg_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 index 8c59e6d1..18e7c636 100644 --- a/cuda/impl/psb_d_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_mv_csrg_from_coo(a,b,info) +subroutine psb_d_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_mv_csrg_from_coo + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_mv_csrg_from_coo #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -62,4 +62,4 @@ subroutine psb_d_mv_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_mv_csrg_from_coo +end subroutine psb_d_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_d_mv_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_d_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 index 30c133e4..837c78c1 100644 --- a/cuda/impl/psb_d_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_mv_csrg_from_fmt(a,b,info) +subroutine psb_d_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_mv_csrg_from_fmt + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_mv_csrg_from_fmt #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer, intent(out) :: info @@ -60,4 +60,4 @@ subroutine psb_d_mv_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_d_mv_csrg_from_fmt +end subroutine psb_d_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_d_mv_diag_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_mv_diag_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 index f37a5523..8d33c459 100644 --- a/cuda/impl/psb_d_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_d_mv_diag_from_coo(a,b,info) +subroutine psb_d_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_mv_diag_from_coo + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_mv_diag_from_coo #else - use psb_d_diag_mat_mod + use psb_d_cuda_diag_mat_mod #endif implicit none - class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -66,4 +66,4 @@ subroutine psb_d_mv_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_mv_diag_from_coo +end subroutine psb_d_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_d_mv_elg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_mv_elg_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 index 73216cfa..ad9e7f10 100644 --- a/cuda/impl/psb_d_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_mv_elg_from_coo(a,b,info) +subroutine psb_d_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_mv_elg_from_coo + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_mv_elg_from_coo #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_d_mv_elg_from_coo(a,b,info) return -end subroutine psb_d_mv_elg_from_coo +end subroutine psb_d_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_d_mv_elg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 similarity index 92% rename from cuda/impl/psb_d_mv_elg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 index 5038c50e..9cdf790e 100644 --- a/cuda/impl/psb_d_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_mv_elg_from_fmt(a,b,info) +subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_mv_elg_from_fmt + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_mv_elg_from_fmt #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -96,4 +96,4 @@ subroutine psb_d_mv_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_d_mv_elg_from_fmt +end subroutine psb_d_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_d_mv_hdiag_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_d_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 index ee0e983f..aff5e0c0 100644 --- a/cuda/impl/psb_d_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_d_mv_hdiag_from_coo(a,b,info) +subroutine psb_d_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_mv_hdiag_from_coo - use psb_gpu_env_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_d_hdiag_mat_mod + use psb_d_cuda_hdiag_mat_mod #endif implicit none - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_d_mv_hdiag_from_coo(a,b,info) #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_d_hdia_sparse_mat%mv_from_coo(b,info) @@ -71,4 +71,4 @@ subroutine psb_d_mv_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_mv_hdiag_from_coo +end subroutine psb_d_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_d_mv_hlg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 similarity index 88% rename from cuda/impl/psb_d_mv_hlg_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 index fe030415..a2b358c4 100644 --- a/cuda/impl/psb_d_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_d_mv_hlg_from_coo(a,b,info) +subroutine psb_d_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_mv_hlg_from_coo + use psb_cuda_env_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_mv_hlg_from_coo #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_d_mv_hlg_from_coo(a,b,info) return -end subroutine psb_d_mv_hlg_from_coo +end subroutine psb_d_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_d_mv_hlg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_d_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 index e538b017..130d88c2 100644 --- a/cuda/impl/psb_d_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_mv_hlg_from_fmt(a,b,info) +subroutine psb_d_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_mv_hlg_from_fmt + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_mv_hlg_from_fmt #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_d_mv_hlg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_d_mv_hlg_from_fmt +end subroutine psb_d_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_d_mv_hybg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_mv_hybg_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 index 4fe76c72..8b0ad032 100644 --- a/cuda/impl/psb_d_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_mv_hybg_from_coo(a,b,info) +subroutine psb_d_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_mv_hybg_from_coo + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_mv_hybg_from_coo #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -61,5 +61,5 @@ subroutine psb_d_mv_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_mv_hybg_from_coo +end subroutine psb_d_cuda_mv_hybg_from_coo #endif diff --git a/cuda/impl/psb_d_mv_hybg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_d_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 index 454533d0..71badfc5 100644 --- a/cuda/impl/psb_d_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_mv_hybg_from_fmt(a,b,info) +subroutine psb_d_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_mv_hybg_from_fmt + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_mv_hybg_from_fmt #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_d_mv_hybg_from_fmt(a,b,info) call a%to_gpu(info) #endif end select -end subroutine psb_d_mv_hybg_from_fmt +end subroutine psb_d_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/impl/psb_s_cp_csrg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_cp_csrg_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 index 4a714d41..b7bebc95 100644 --- a/cuda/impl/psb_s_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_s_cp_csrg_from_coo(a,b,info) +subroutine psb_s_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_cp_csrg_from_coo + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_cp_csrg_from_coo #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_s_cp_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_cp_csrg_from_coo +end subroutine psb_s_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_s_cp_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_s_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 index 962a8c9d..7ab9283d 100644 --- a/cuda/impl/psb_s_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_s_cp_csrg_from_fmt(a,b,info) +subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_cp_csrg_from_fmt + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_cp_csrg_from_fmt #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif !use iso_c_binding implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_s_cp_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_s_cp_csrg_from_fmt +end subroutine psb_s_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_s_cp_diag_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_cp_diag_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 index 6b105ef2..9f038a09 100644 --- a/cuda/impl/psb_s_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_cp_diag_from_coo(a,b,info) +subroutine psb_s_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_cp_diag_from_coo + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_cp_diag_from_coo #else - use psb_s_diag_mat_mod + use psb_s_cuda_diag_mat_mod #endif implicit none - class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -61,4 +61,4 @@ subroutine psb_s_cp_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_cp_diag_from_coo +end subroutine psb_s_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_s_cp_elg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 similarity index 94% rename from cuda/impl/psb_s_cp_elg_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 index af8c7d28..f6e1ba42 100644 --- a/cuda/impl/psb_s_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_s_cp_elg_from_coo(a,b,info) +subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_cp_elg_from_coo + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_cp_elg_from_coo use psi_ext_util_mod - use psb_gpu_env_mod + use psb_cuda_env_mod #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -58,7 +58,7 @@ subroutine psb_s_cp_elg_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - hacksize = max(1,psb_gpu_WarpSize()) + hacksize = max(1,psb_cuda_WarpSize()) #else hacksize = 1 #endif @@ -181,4 +181,4 @@ contains end subroutine psi_s_count_ell_from_coo -end subroutine psb_s_cp_elg_from_coo +end subroutine psb_s_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_s_cp_elg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 similarity index 93% rename from cuda/impl/psb_s_cp_elg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 index c3d973e1..0c811426 100644 --- a/cuda/impl/psb_s_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_cp_elg_from_fmt(a,b,info) +subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_cp_elg_from_fmt + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_cp_elg_from_fmt #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -98,4 +98,4 @@ subroutine psb_s_cp_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_s_cp_elg_from_fmt +end subroutine psb_s_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_s_cp_hdiag_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_s_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 index 0509706d..07b56fa6 100644 --- a/cuda/impl/psb_s_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_s_cp_hdiag_from_coo(a,b,info) +subroutine psb_s_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_cp_hdiag_from_coo - use psb_gpu_env_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_s_hdiag_mat_mod + use psb_s_cuda_hdiag_mat_mod #endif implicit none - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_s_cp_hdiag_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_s_hdia_sparse_mat%cp_from_coo(b,info) @@ -70,4 +70,4 @@ subroutine psb_s_cp_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_cp_hdiag_from_coo +end subroutine psb_s_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_s_cp_hlg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 similarity index 95% rename from cuda/impl/psb_s_cp_hlg_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 index 5988c8dd..055fa046 100644 --- a/cuda/impl/psb_s_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_s_cp_hlg_from_coo(a,b,info) +subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_cp_hlg_from_coo + use psb_cuda_env_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_cp_hlg_from_coo #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -62,7 +62,7 @@ subroutine psb_s_cp_hlg_from_coo(a,b,info) debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() #ifdef HAVE_SPGPU - hksz = max(1,psb_gpu_WarpSize()) + hksz = max(1,psb_cuda_WarpSize()) #else hksz = psi_get_hksz() #endif @@ -195,4 +195,4 @@ contains !!$ write(*,*) 'End of psi_comput_hckoff ',info end subroutine psi_compute_hckoff_from_coo -end subroutine psb_s_cp_hlg_from_coo +end subroutine psb_s_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_s_cp_hlg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 similarity index 90% rename from cuda/impl/psb_s_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 index 41c20866..b49be761 100644 --- a/cuda/impl/psb_s_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_cp_hlg_from_fmt(a,b,info) +subroutine psb_s_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_cp_hlg_from_fmt + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_cp_hlg_from_fmt #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -65,4 +65,4 @@ subroutine psb_s_cp_hlg_from_fmt(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_cp_hlg_from_fmt +end subroutine psb_s_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_s_cp_hybg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_cp_hybg_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 index 92dc4a68..ab135944 100644 --- a/cuda/impl/psb_s_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_cp_hybg_from_coo(a,b,info) +subroutine psb_s_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_cp_hybg_from_coo + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_cp_hybg_from_coo #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -60,5 +60,5 @@ subroutine psb_s_cp_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_cp_hybg_from_coo +end subroutine psb_s_cuda_cp_hybg_from_coo #endif diff --git a/cuda/impl/psb_s_cp_hybg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_s_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 index 53143776..62a54759 100644 --- a/cuda/impl/psb_s_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_cp_hybg_from_fmt(a,b,info) +subroutine psb_s_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_cp_hybg_from_fmt + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_cp_hybg_from_fmt #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_s_cp_hybg_from_fmt(a,b,info) #endif end select -end subroutine psb_s_cp_hybg_from_fmt +end subroutine psb_s_cuda_cp_hybg_from_fmt #endif diff --git a/cuda/impl/psb_s_csrg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 similarity index 89% rename from cuda/impl/psb_s_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 index e93452d2..53ca8f12 100644 --- a/cuda/impl/psb_s_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_csrg_allocate_mnnz(m,n,a,nz) +subroutine psb_s_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_allocate_mnnz + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_allocate_mnnz #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,4 +65,4 @@ subroutine psb_s_csrg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_s_csrg_allocate_mnnz +end subroutine psb_s_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_s_csrg_csmm.F90 b/cuda/impl/psb_s_cuda_csrg_csmm.F90 similarity index 94% rename from cuda/impl/psb_s_csrg_csmm.F90 rename to cuda/impl/psb_s_cuda_csrg_csmm.F90 index 55087053..c8ff4a9e 100644 --- a/cuda/impl/psb_s_csrg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_csrg_csmm.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_csrg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_csmm + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_csmm #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -131,4 +131,4 @@ subroutine psb_s_csrg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_csrg_csmm +end subroutine psb_s_cuda_csrg_csmm diff --git a/cuda/impl/psb_s_csrg_csmv.F90 b/cuda/impl/psb_s_cuda_csrg_csmv.F90 similarity index 93% rename from cuda/impl/psb_s_csrg_csmv.F90 rename to cuda/impl/psb_s_cuda_csrg_csmv.F90 index 42520843..72658c28 100644 --- a/cuda/impl/psb_s_csrg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_csrg_csmv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_csrg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_csmv + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_csmv #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_s_csrg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_csrg_csmv' + character(len=20) :: name='s_cuda_csrg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -136,4 +136,4 @@ subroutine psb_s_csrg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_csrg_csmv +end subroutine psb_s_cuda_csrg_csmv diff --git a/cuda/impl/psb_z_csrg_from_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_z_csrg_from_gpu.F90 rename to cuda/impl/psb_s_cuda_csrg_from_gpu.F90 index f8a6951a..7811f746 100644 --- a/cuda/impl/psb_z_csrg_from_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_csrg_from_gpu(a,info) +subroutine psb_s_cuda_csrg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_from_gpu + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_from_gpu #else - use psb_z_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, n, nz @@ -70,4 +70,4 @@ subroutine psb_z_csrg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_z_csrg_from_gpu +end subroutine psb_s_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_s_csrg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_s_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 index 133a6350..7f9965d8 100644 --- a/cuda/impl/psb_s_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_inner_vect_sv + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_inner_vect_sv #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -51,7 +51,7 @@ subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='s_csrg_inner_vect_sv' + character(len=20) :: name='s_cuda_csrg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -83,9 +83,9 @@ subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -133,4 +133,4 @@ subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_csrg_inner_vect_sv +end subroutine psb_s_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_s_csrg_mold.F90 b/cuda/impl/psb_s_cuda_csrg_mold.F90 similarity index 88% rename from cuda/impl/psb_s_csrg_mold.F90 rename to cuda/impl/psb_s_cuda_csrg_mold.F90 index 6ac4cc3d..5e33850b 100644 --- a/cuda/impl/psb_s_csrg_mold.F90 +++ b/cuda/impl/psb_s_cuda_csrg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_csrg_mold(a,b,info) +subroutine psb_s_cuda_csrg_mold(a,b,info) use psb_base_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_mold + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_mold implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_csrg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_csrg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_csrg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_s_csrg_mold(a,b,info) return -end subroutine psb_s_csrg_mold +end subroutine psb_s_cuda_csrg_mold diff --git a/cuda/impl/psb_s_csrg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_s_csrg_reallocate_nz.F90 rename to cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 index dd9a50d0..fed3b0e7 100644 --- a/cuda/impl/psb_s_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_s_csrg_reallocate_nz(nz,a) +subroutine psb_s_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_reallocate_nz + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_reallocate_nz #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='s_csrg_reallocate_nz' + character(len=20) :: name='s_cuda_csrg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,4 +67,4 @@ subroutine psb_s_csrg_reallocate_nz(nz,a) return -end subroutine psb_s_csrg_reallocate_nz +end subroutine psb_s_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_s_csrg_scal.F90 b/cuda/impl/psb_s_cuda_csrg_scal.F90 similarity index 90% rename from cuda/impl/psb_s_csrg_scal.F90 rename to cuda/impl/psb_s_cuda_csrg_scal.F90 index 5e0fbcf0..826ab2dd 100644 --- a/cuda/impl/psb_s_csrg_scal.F90 +++ b/cuda/impl/psb_s_cuda_csrg_scal.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_s_csrg_scal(d,a,info,side) +subroutine psb_s_cuda_csrg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_scal + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_scal #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -70,4 +70,4 @@ subroutine psb_s_csrg_scal(d,a,info,side) return -end subroutine psb_s_csrg_scal +end subroutine psb_s_cuda_csrg_scal diff --git a/cuda/impl/psb_s_csrg_scals.F90 b/cuda/impl/psb_s_cuda_csrg_scals.F90 similarity index 90% rename from cuda/impl/psb_s_csrg_scals.F90 rename to cuda/impl/psb_s_cuda_csrg_scals.F90 index 54b299a1..04f4c29a 100644 --- a/cuda/impl/psb_s_csrg_scals.F90 +++ b/cuda/impl/psb_s_cuda_csrg_scals.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_s_csrg_scals(d,a,info) +subroutine psb_s_cuda_csrg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_scals + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_scals #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -68,4 +68,4 @@ subroutine psb_s_csrg_scals(d,a,info) return -end subroutine psb_s_csrg_scals +end subroutine psb_s_cuda_csrg_scals diff --git a/cuda/impl/psb_s_csrg_to_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 similarity index 98% rename from cuda/impl/psb_s_csrg_to_gpu.F90 rename to cuda/impl/psb_s_cuda_csrg_to_gpu.F90 index f90ae4ea..eadca5df 100644 --- a/cuda/impl/psb_s_csrg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_s_csrg_to_gpu(a,info,nzrm) +subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_to_gpu + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_to_gpu #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -322,4 +322,4 @@ subroutine psb_s_csrg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_s_csrg_to_gpu +end subroutine psb_s_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_s_csrg_vect_mv.F90 b/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_s_csrg_vect_mv.F90 rename to cuda/impl/psb_s_cuda_csrg_vect_mv.F90 index ff88bf89..38e2dfc0 100644 --- a/cuda/impl/psb_s_csrg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_vect_mv + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_vect_mv #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -54,7 +54,7 @@ subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_csrg_vect_mv' + character(len=20) :: name='s_cuda_csrg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= szero) then if (yy%is_host()) call yy%sync() @@ -122,4 +122,4 @@ subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) 9999 call psb_error_handler(err_act) return -end subroutine psb_s_csrg_vect_mv +end subroutine psb_s_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_s_diag_csmv.F90 b/cuda/impl/psb_s_cuda_diag_csmv.F90 similarity index 92% rename from cuda/impl/psb_s_diag_csmv.F90 rename to cuda/impl/psb_s_cuda_diag_csmv.F90 index 4cf14d12..214cf6f8 100644 --- a/cuda/impl/psb_s_diag_csmv.F90 +++ b/cuda/impl/psb_s_cuda_diag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_csmv + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_csmv #else - use psb_s_diag_mat_mod + use psb_s_cuda_diag_mat_mod #endif implicit none - class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='s_diag_csmv' + character(len=20) :: name='s_cuda_diag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_diag_csmv +end subroutine psb_s_cuda_diag_csmv diff --git a/cuda/impl/psb_s_diag_mold.F90 b/cuda/impl/psb_s_cuda_diag_mold.F90 similarity index 88% rename from cuda/impl/psb_s_diag_mold.F90 rename to cuda/impl/psb_s_cuda_diag_mold.F90 index a7690f62..9e6c58a6 100644 --- a/cuda/impl/psb_s_diag_mold.F90 +++ b/cuda/impl/psb_s_cuda_diag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_diag_mold(a,b,info) +subroutine psb_s_cuda_diag_mold(a,b,info) use psb_base_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_mold + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_mold implicit none - class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_diag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_diag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_diag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_s_diag_mold(a,b,info) return -end subroutine psb_s_diag_mold +end subroutine psb_s_cuda_diag_mold diff --git a/cuda/impl/psb_s_diag_to_gpu.F90 b/cuda/impl/psb_s_cuda_diag_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_s_diag_to_gpu.F90 rename to cuda/impl/psb_s_cuda_diag_to_gpu.F90 index bb09b127..c1ee7401 100644 --- a/cuda/impl/psb_s_diag_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_diag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_diag_to_gpu(a,info,nzrm) +subroutine psb_s_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_to_gpu + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_to_gpu #else - use psb_s_diag_mat_mod + use psb_s_cuda_diag_mat_mod #endif use iso_c_binding implicit none - class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -71,4 +71,4 @@ subroutine psb_s_diag_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_s_diag_to_gpu +end subroutine psb_s_cuda_diag_to_gpu diff --git a/cuda/impl/psb_s_diag_vect_mv.F90 b/cuda/impl/psb_s_cuda_diag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_s_diag_vect_mv.F90 rename to cuda/impl/psb_s_cuda_diag_vect_mv.F90 index 31976247..ab655b7c 100644 --- a/cuda/impl/psb_s_diag_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_diag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_vect_mv + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_vect_mv #else - use psb_s_diag_mat_mod + use psb_s_cuda_diag_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_diag_vect_mv' + character(len=20) :: name='s_cuda_diag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_diag_vect_mv +end subroutine psb_s_cuda_diag_vect_mv diff --git a/cuda/impl/psb_s_dnsg_mat_impl.F90 b/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 similarity index 77% rename from cuda/impl/psb_s_dnsg_mat_impl.F90 rename to cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 index 13c58985..861724aa 100644 --- a/cuda/impl/psb_s_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_dnsg_vect_mv + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_vect_mv #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(in) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -50,7 +50,7 @@ subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) character :: trans_ real(psb_spk_), allocatable :: rx(:), ry(:) Integer(Psb_ipk_) :: err_act, m, n, k - character(len=20) :: name='s_dnsg_vect_mv' + character(len=20) :: name='s_cuda_dnsg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -76,9 +76,9 @@ subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) k = a%get_nrows() end if select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= szero) then @@ -117,21 +117,21 @@ subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_dnsg_vect_mv +end subroutine psb_s_cuda_dnsg_vect_mv -subroutine psb_s_dnsg_mold(a,b,info) +subroutine psb_s_cuda_dnsg_mold(a,b,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_dnsg_mold + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_mold #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(in) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -145,7 +145,7 @@ subroutine psb_s_dnsg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_dnsg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_dnsg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -158,54 +158,54 @@ subroutine psb_s_dnsg_mold(a,b,info) return -end subroutine psb_s_dnsg_mold +end subroutine psb_s_cuda_dnsg_mold !!$ !!$ interface -!!$ subroutine psb_s_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_s_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type -!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_s_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_dnsg_inner_vect_sv +!!$ end subroutine psb_s_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_s_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_s_dnsg_reallocate_nz +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_s_dnsg_allocate_mnnz +!!$ end subroutine psb_s_cuda_dnsg_allocate_mnnz !!$ end interface -subroutine psb_s_dnsg_to_gpu(a,info) +subroutine psb_s_cuda_dnsg_to_gpu(a,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_dnsg_to_gpu + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_to_gpu #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda logical, parameter :: debug=.false. - character(len=20) :: name='s_dnsg_to_gpu' + character(len=20) :: name='s_cuda_dnsg_to_gpu' call psb_erractionsave(err_act) info = psb_success_ @@ -226,27 +226,27 @@ subroutine psb_s_dnsg_to_gpu(a,info) return -end subroutine psb_s_dnsg_to_gpu +end subroutine psb_s_cuda_dnsg_to_gpu -subroutine psb_s_cp_dnsg_from_coo(a,b,info) +subroutine psb_s_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_cp_dnsg_from_coo + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_cp_dnsg_from_coo #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_dnsg_cp_from_coo' + character(len=20) :: name='s_cuda_dnsg_cp_from_coo' integer(psb_ipk_) :: debug_level, debug_unit logical, parameter :: debug=.false. type(psb_s_coo_sparse_mat) :: tmp @@ -267,27 +267,27 @@ subroutine psb_s_cp_dnsg_from_coo(a,b,info) return -end subroutine psb_s_cp_dnsg_from_coo +end subroutine psb_s_cuda_cp_dnsg_from_coo -subroutine psb_s_cp_dnsg_from_fmt(a,b,info) +subroutine psb_s_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_cp_dnsg_from_fmt + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_cp_dnsg_from_fmt #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info type(psb_s_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_dnsg_cp_from_fmt' + character(len=20) :: name='s_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -341,29 +341,29 @@ subroutine psb_s_cp_dnsg_from_fmt(a,b,info) return -end subroutine psb_s_cp_dnsg_from_fmt +end subroutine psb_s_cuda_cp_dnsg_from_fmt -subroutine psb_s_mv_dnsg_from_coo(a,b,info) +subroutine psb_s_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_mv_dnsg_from_coo + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_mv_dnsg_from_coo #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act logical, parameter :: debug=.false. - character(len=20) :: name='s_dnsg_mv_from_coo' + character(len=20) :: name='s_cuda_dnsg_mv_from_coo' call psb_erractionsave(err_act) info = psb_success_ @@ -382,28 +382,28 @@ subroutine psb_s_mv_dnsg_from_coo(a,b,info) return -end subroutine psb_s_mv_dnsg_from_coo +end subroutine psb_s_cuda_mv_dnsg_from_coo -subroutine psb_s_mv_dnsg_from_fmt(a,b,info) +subroutine psb_s_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_mv_dnsg_from_fmt + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_mv_dnsg_from_fmt #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info type(psb_s_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_dnsg_cp_from_fmt' + character(len=20) :: name='s_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -458,4 +458,4 @@ subroutine psb_s_mv_dnsg_from_fmt(a,b,info) return -end subroutine psb_s_mv_dnsg_from_fmt +end subroutine psb_s_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_s_elg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 similarity index 93% rename from cuda/impl/psb_s_elg_allocate_mnnz.F90 rename to cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 index f3b1d743..63c41644 100644 --- a/cuda/impl/psb_s_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_elg_allocate_mnnz(m,n,a,nz) +subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_allocate_mnnz + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_allocate_mnnz #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -110,4 +110,4 @@ subroutine psb_s_elg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_s_elg_allocate_mnnz +end subroutine psb_s_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_c_elg_asb.f90 b/cuda/impl/psb_s_cuda_elg_asb.f90 similarity index 92% rename from cuda/impl/psb_c_elg_asb.f90 rename to cuda/impl/psb_s_cuda_elg_asb.f90 index f2a8c641..0d53c26a 100644 --- a/cuda/impl/psb_c_elg_asb.f90 +++ b/cuda/impl/psb_s_cuda_elg_asb.f90 @@ -30,13 +30,13 @@ ! -subroutine psb_c_elg_asb(a) +subroutine psb_s_cuda_elg_asb(a) use psb_base_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_asb + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_asb implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: err_act, info character(len=20) :: name='elg_asb' @@ -62,4 +62,4 @@ subroutine psb_c_elg_asb(a) return -end subroutine psb_c_elg_asb +end subroutine psb_s_cuda_elg_asb diff --git a/cuda/impl/psb_s_elg_csmm.F90 b/cuda/impl/psb_s_cuda_elg_csmm.F90 similarity index 93% rename from cuda/impl/psb_s_elg_csmm.F90 rename to cuda/impl/psb_s_cuda_elg_csmm.F90 index 8bda23e3..e7f88a2e 100644 --- a/cuda/impl/psb_s_elg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_elg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csmm + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csmm #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_elg_csmm' + character(len=20) :: name='s_cuda_elg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,4 +131,4 @@ subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_elg_csmm +end subroutine psb_s_cuda_elg_csmm diff --git a/cuda/impl/psb_s_elg_csmv.F90 b/cuda/impl/psb_s_cuda_elg_csmv.F90 similarity index 94% rename from cuda/impl/psb_s_elg_csmv.F90 rename to cuda/impl/psb_s_cuda_elg_csmv.F90 index 29e345c2..1844d338 100644 --- a/cuda/impl/psb_s_elg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_elg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csmv + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csmv #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -133,4 +133,4 @@ subroutine psb_s_elg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_elg_csmv +end subroutine psb_s_cuda_elg_csmv diff --git a/cuda/impl/psb_s_elg_csput.F90 b/cuda/impl/psb_s_cuda_elg_csput.F90 similarity index 89% rename from cuda/impl/psb_s_elg_csput.F90 rename to cuda/impl/psb_s_cuda_elg_csput.F90 index ff2b0ff3..036eabb2 100644 --- a/cuda/impl/psb_s_elg_csput.F90 +++ b/cuda/impl/psb_s_cuda_elg_csput.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_s_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_s_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csput_a + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csput_a #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - character(len=20) :: name='s_elg_csput_a' + character(len=20) :: name='s_cuda_elg_csput_a' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit real(psb_dpk_) :: t1,t2,t3 @@ -120,24 +120,24 @@ subroutine psb_s_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_s_elg_csput_a +end subroutine psb_s_cuda_elg_csput_a -subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_s_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csput_v - use psb_s_gpu_vect_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csput_v + use psb_s_cuda_vect_mod #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax @@ -145,7 +145,7 @@ subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act - character(len=20) :: name='s_elg_csput_v' + character(len=20) :: name='s_cuda_elg_csput_v' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw logical :: gpu_invoked @@ -199,11 +199,11 @@ subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) t1=psb_wtime() gpu_invoked = .false. select type (ia) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (ja) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (val) - class is (psb_s_vect_gpu) + class is (psb_s_vect_cuda) if (a%is_host()) call a%sync() if (val%is_host()) call val%sync() if (ia%is_host()) call ia%sync() @@ -245,4 +245,4 @@ subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_s_elg_csput_v +end subroutine psb_s_cuda_elg_csput_v diff --git a/cuda/impl/psb_c_elg_from_gpu.F90 b/cuda/impl/psb_s_cuda_elg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_c_elg_from_gpu.F90 rename to cuda/impl/psb_s_cuda_elg_from_gpu.F90 index eda65380..bdc55790 100644 --- a/cuda/impl/psb_c_elg_from_gpu.F90 +++ b/cuda/impl/psb_s_cuda_elg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_from_gpu(a,info) +subroutine psb_s_cuda_elg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_from_gpu + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_from_gpu #else - use psb_c_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize @@ -71,4 +71,4 @@ subroutine psb_c_elg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_c_elg_from_gpu +end subroutine psb_s_cuda_elg_from_gpu diff --git a/cuda/impl/psb_s_elg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 similarity index 89% rename from cuda/impl/psb_s_elg_inner_vect_sv.F90 rename to cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 index 83c79cf3..79e546f5 100644 --- a/cuda/impl/psb_s_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_s_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_inner_vect_sv + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_inner_vect_sv #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans integer(psb_ipk_) :: err_act - character(len=20) :: name='s_elg_inner_vect_sv' + character(len=20) :: name='s_cuda_elg_inner_vect_sv' logical, parameter :: debug=.false. real(psb_spk_), allocatable :: rx(:), ry(:) @@ -86,4 +86,4 @@ subroutine psb_s_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_elg_inner_vect_sv +end subroutine psb_s_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_s_elg_mold.F90 b/cuda/impl/psb_s_cuda_elg_mold.F90 similarity index 89% rename from cuda/impl/psb_s_elg_mold.F90 rename to cuda/impl/psb_s_cuda_elg_mold.F90 index a481d605..dc8730bb 100644 --- a/cuda/impl/psb_s_elg_mold.F90 +++ b/cuda/impl/psb_s_cuda_elg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_elg_mold(a,b,info) +subroutine psb_s_cuda_elg_mold(a,b,info) use psb_base_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_mold + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_mold implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_elg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_elg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_elg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_s_elg_mold(a,b,info) return -end subroutine psb_s_elg_mold +end subroutine psb_s_cuda_elg_mold diff --git a/cuda/impl/psb_s_elg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 similarity index 89% rename from cuda/impl/psb_s_elg_reallocate_nz.F90 rename to cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 index 22916852..3f34fcec 100644 --- a/cuda/impl/psb_s_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_s_elg_reallocate_nz(nz,a) +subroutine psb_s_cuda_elg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_reallocate_nz + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_reallocate_nz #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='s_elg_reallocate_nz' + character(len=20) :: name='s_cuda_elg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -76,4 +76,4 @@ subroutine psb_s_elg_reallocate_nz(nz,a) return -end subroutine psb_s_elg_reallocate_nz +end subroutine psb_s_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_s_elg_scal.F90 b/cuda/impl/psb_s_cuda_elg_scal.F90 similarity index 91% rename from cuda/impl/psb_s_elg_scal.F90 rename to cuda/impl/psb_s_cuda_elg_scal.F90 index 913ae47e..cd6e1a5b 100644 --- a/cuda/impl/psb_s_elg_scal.F90 +++ b/cuda/impl/psb_s_cuda_elg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_scal(d,a,info,side) +subroutine psb_s_cuda_elg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_scal + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_scal #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -75,4 +75,4 @@ subroutine psb_s_elg_scal(d,a,info,side) return -end subroutine psb_s_elg_scal +end subroutine psb_s_cuda_elg_scal diff --git a/cuda/impl/psb_s_elg_scals.F90 b/cuda/impl/psb_s_cuda_elg_scals.F90 similarity index 90% rename from cuda/impl/psb_s_elg_scals.F90 rename to cuda/impl/psb_s_cuda_elg_scals.F90 index 8261fc94..4ee8a64d 100644 --- a/cuda/impl/psb_s_elg_scals.F90 +++ b/cuda/impl/psb_s_cuda_elg_scals.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_scals(d,a,info) +subroutine psb_s_cuda_elg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_scals + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_scals #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_s_elg_scals(d,a,info) return -end subroutine psb_s_elg_scals +end subroutine psb_s_cuda_elg_scals diff --git a/cuda/impl/psb_s_elg_to_gpu.F90 b/cuda/impl/psb_s_cuda_elg_to_gpu.F90 similarity index 93% rename from cuda/impl/psb_s_elg_to_gpu.F90 rename to cuda/impl/psb_s_cuda_elg_to_gpu.F90 index bf86343b..7d04d2b0 100644 --- a/cuda/impl/psb_s_elg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_elg_to_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_to_gpu(a,info,nzrm) +subroutine psb_s_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_to_gpu + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_to_gpu #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -90,4 +90,4 @@ subroutine psb_s_elg_to_gpu(a,info,nzrm) call a%set_sync() #endif -end subroutine psb_s_elg_to_gpu +end subroutine psb_s_cuda_elg_to_gpu diff --git a/cuda/impl/psb_z_elg_trim.f90 b/cuda/impl/psb_s_cuda_elg_trim.f90 similarity index 92% rename from cuda/impl/psb_z_elg_trim.f90 rename to cuda/impl/psb_s_cuda_elg_trim.f90 index 9bd43312..516aebc4 100644 --- a/cuda/impl/psb_z_elg_trim.f90 +++ b/cuda/impl/psb_s_cuda_elg_trim.f90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_elg_trim(a) +subroutine psb_s_cuda_elg_trim(a) use psb_base_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_trim + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_trim implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld character(len=20) :: name='trim' logical, parameter :: debug=.false. @@ -59,4 +59,4 @@ subroutine psb_z_elg_trim(a) return -end subroutine psb_z_elg_trim +end subroutine psb_s_cuda_elg_trim diff --git a/cuda/impl/psb_s_elg_vect_mv.F90 b/cuda/impl/psb_s_cuda_elg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_s_elg_vect_mv.F90 rename to cuda/impl/psb_s_cuda_elg_vect_mv.F90 index f8d297d1..dad62418 100644 --- a/cuda/impl/psb_s_elg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_elg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_vect_mv + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_vect_mv #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_elg_vect_mv' + character(len=20) :: name='s_cuda_elg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= szero) then @@ -128,4 +128,4 @@ subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_elg_vect_mv +end subroutine psb_s_cuda_elg_vect_mv diff --git a/cuda/impl/psb_s_hdiag_csmv.F90 b/cuda/impl/psb_s_cuda_hdiag_csmv.F90 similarity index 92% rename from cuda/impl/psb_s_hdiag_csmv.F90 rename to cuda/impl/psb_s_cuda_hdiag_csmv.F90 index 3320901c..8e7e4931 100644 --- a/cuda/impl/psb_s_hdiag_csmv.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_csmv + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_csmv #else - use psb_s_hdiag_mat_mod + use psb_s_cuda_hdiag_mat_mod #endif implicit none - class(psb_s_hdiag_sparse_mat), intent(in) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='s_hdiag_csmv' + character(len=20) :: name='s_cuda_hdiag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hdiag_csmv +end subroutine psb_s_cuda_hdiag_csmv diff --git a/cuda/impl/psb_s_hdiag_mold.F90 b/cuda/impl/psb_s_cuda_hdiag_mold.F90 similarity index 88% rename from cuda/impl/psb_s_hdiag_mold.F90 rename to cuda/impl/psb_s_cuda_hdiag_mold.F90 index 1486b17e..e662b07b 100644 --- a/cuda/impl/psb_s_hdiag_mold.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_hdiag_mold(a,b,info) +subroutine psb_s_cuda_hdiag_mold(a,b,info) use psb_base_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_mold + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_mold implicit none - class(psb_s_hdiag_sparse_mat), intent(in) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_hdiag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_hdiag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_hdiag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_s_hdiag_mold(a,b,info) return -end subroutine psb_s_hdiag_mold +end subroutine psb_s_cuda_hdiag_mold diff --git a/cuda/impl/psb_z_hdiag_to_gpu.F90 b/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 similarity index 92% rename from cuda/impl/psb_z_hdiag_to_gpu.F90 rename to cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 index 622a0141..5fe493aa 100644 --- a/cuda/impl/psb_z_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hdiag_to_gpu(a,info) +subroutine psb_s_cuda_hdiag_to_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_to_gpu + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_to_gpu #else - use psb_z_hdiag_mat_mod + use psb_s_cuda_hdiag_mat_mod #endif use iso_c_binding implicit none - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight #ifdef HAVE_SPGPU @@ -83,4 +83,4 @@ subroutine psb_z_hdiag_to_gpu(a,info) #endif -end subroutine psb_z_hdiag_to_gpu +end subroutine psb_s_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_s_hdiag_vect_mv.F90 b/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_s_hdiag_vect_mv.F90 rename to cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 index ac261e92..3496a637 100644 --- a/cuda/impl/psb_s_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_vect_mv + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_vect_mv #else - use psb_s_hdiag_mat_mod + use psb_s_cuda_hdiag_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_hdiag_sparse_mat), intent(in) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hdiag_vect_mv' + character(len=20) :: name='s_cuda_hdiag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hdiag_vect_mv +end subroutine psb_s_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_s_hlg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_s_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 index c7e430f1..3f2765c4 100644 --- a/cuda/impl/psb_s_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hlg_allocate_mnnz(m,n,a,nz) +subroutine psb_s_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_allocate_mnnz + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_allocate_mnnz #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -68,4 +68,4 @@ subroutine psb_s_hlg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_s_hlg_allocate_mnnz +end subroutine psb_s_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_s_hlg_csmm.F90 b/cuda/impl/psb_s_cuda_hlg_csmm.F90 similarity index 93% rename from cuda/impl/psb_s_hlg_csmm.F90 rename to cuda/impl/psb_s_cuda_hlg_csmm.F90 index 126b17e6..2e274c22 100644 --- a/cuda/impl/psb_s_hlg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_hlg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_csmm + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_csmm #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hlg_csmm' + character(len=20) :: name='s_cuda_hlg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -129,4 +129,4 @@ subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hlg_csmm +end subroutine psb_s_cuda_hlg_csmm diff --git a/cuda/impl/psb_s_hlg_csmv.F90 b/cuda/impl/psb_s_cuda_hlg_csmv.F90 similarity index 93% rename from cuda/impl/psb_s_hlg_csmv.F90 rename to cuda/impl/psb_s_cuda_hlg_csmv.F90 index 2a7f5a4d..56ea8cdb 100644 --- a/cuda/impl/psb_s_hlg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_hlg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_hlg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_csmv + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_csmv #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_hlg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='s_hlg_csmv' + character(len=20) :: name='s_cuda_hlg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -132,4 +132,4 @@ subroutine psb_s_hlg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hlg_csmv +end subroutine psb_s_cuda_hlg_csmv diff --git a/cuda/impl/psb_c_hlg_from_gpu.F90 b/cuda/impl/psb_s_cuda_hlg_from_gpu.F90 similarity index 92% rename from cuda/impl/psb_c_hlg_from_gpu.F90 rename to cuda/impl/psb_s_cuda_hlg_from_gpu.F90 index 85f337ff..14ab19b7 100644 --- a/cuda/impl/psb_c_hlg_from_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hlg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_hlg_from_gpu(a,info) +subroutine psb_s_cuda_hlg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_from_gpu + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_from_gpu #else - use psb_c_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr @@ -73,4 +73,4 @@ subroutine psb_c_hlg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_c_hlg_from_gpu +end subroutine psb_s_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_s_hlg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_s_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 index d545eb02..a9f4f743 100644 --- a/cuda/impl/psb_s_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_inner_vect_sv + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_inner_vect_sv #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -78,4 +78,4 @@ subroutine psb_s_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hlg_inner_vect_sv +end subroutine psb_s_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_s_hlg_mold.F90 b/cuda/impl/psb_s_cuda_hlg_mold.F90 similarity index 89% rename from cuda/impl/psb_s_hlg_mold.F90 rename to cuda/impl/psb_s_cuda_hlg_mold.F90 index c5dc4774..90e9cebf 100644 --- a/cuda/impl/psb_s_hlg_mold.F90 +++ b/cuda/impl/psb_s_cuda_hlg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_hlg_mold(a,b,info) +subroutine psb_s_cuda_hlg_mold(a,b,info) use psb_base_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_mold + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_mold implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer, intent(out) :: info Integer :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_hlg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_hlg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_hlg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -61,4 +61,4 @@ subroutine psb_s_hlg_mold(a,b,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_s_hlg_mold +end subroutine psb_s_cuda_hlg_mold diff --git a/cuda/impl/psb_s_hlg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_s_hlg_reallocate_nz.F90 rename to cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 index 19cd95df..d5b9333c 100644 --- a/cuda/impl/psb_s_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_s_hlg_reallocate_nz(nz,a) +subroutine psb_s_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_reallocate_nz + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_reallocate_nz #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='s_hlg_reallocate_nz' + character(len=20) :: name='s_cuda_hlg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -64,4 +64,4 @@ subroutine psb_s_hlg_reallocate_nz(nz,a) return -end subroutine psb_s_hlg_reallocate_nz +end subroutine psb_s_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_s_hlg_scal.F90 b/cuda/impl/psb_s_cuda_hlg_scal.F90 similarity index 91% rename from cuda/impl/psb_s_hlg_scal.F90 rename to cuda/impl/psb_s_cuda_hlg_scal.F90 index cd389baa..e803a63d 100644 --- a/cuda/impl/psb_s_hlg_scal.F90 +++ b/cuda/impl/psb_s_cuda_hlg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_hlg_scal(d,a,info,side) +subroutine psb_s_cuda_hlg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_scal + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_scal #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,4 +72,4 @@ subroutine psb_s_hlg_scal(d,a,info,side) return -end subroutine psb_s_hlg_scal +end subroutine psb_s_cuda_hlg_scal diff --git a/cuda/impl/psb_s_hlg_scals.F90 b/cuda/impl/psb_s_cuda_hlg_scals.F90 similarity index 91% rename from cuda/impl/psb_s_hlg_scals.F90 rename to cuda/impl/psb_s_cuda_hlg_scals.F90 index 256fac3e..eec592e1 100644 --- a/cuda/impl/psb_s_hlg_scals.F90 +++ b/cuda/impl/psb_s_cuda_hlg_scals.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hlg_scals(d,a,info) +subroutine psb_s_cuda_hlg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_scals + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_scals #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_s_hlg_scals(d,a,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_s_hlg_scals +end subroutine psb_s_cuda_hlg_scals diff --git a/cuda/impl/psb_s_hlg_to_gpu.F90 b/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_s_hlg_to_gpu.F90 rename to cuda/impl/psb_s_cuda_hlg_to_gpu.F90 index 139482c2..14a2a629 100644 --- a/cuda/impl/psb_s_hlg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hlg_to_gpu(a,info,nzrm) +subroutine psb_s_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_to_gpu + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_to_gpu #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -65,4 +65,4 @@ subroutine psb_s_hlg_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_s_hlg_to_gpu +end subroutine psb_s_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_s_hlg_vect_mv.F90 b/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_s_hlg_vect_mv.F90 rename to cuda/impl/psb_s_cuda_hlg_vect_mv.F90 index 52f322aa..2b964f91 100644 --- a/cuda/impl/psb_s_hlg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_vect_mv + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_vect_mv #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hlg_vect_mv' + character(len=20) :: name='s_cuda_hlg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -126,4 +126,4 @@ subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hlg_vect_mv +end subroutine psb_s_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_s_hybg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_s_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 index f2b79c77..0cf1e2bc 100644 --- a/cuda/impl/psb_s_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_allocate_mnnz(m,n,a,nz) +subroutine psb_s_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_allocate_mnnz + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_allocate_mnnz #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,5 +65,5 @@ subroutine psb_s_hybg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_s_hybg_allocate_mnnz +end subroutine psb_s_cuda_hybg_allocate_mnnz #endif diff --git a/cuda/impl/psb_s_hybg_csmm.F90 b/cuda/impl/psb_s_cuda_hybg_csmm.F90 similarity index 93% rename from cuda/impl/psb_s_hybg_csmm.F90 rename to cuda/impl/psb_s_cuda_hybg_csmm.F90 index 9de67633..f89df384 100644 --- a/cuda/impl/psb_s_hybg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_hybg_csmm.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_csmm + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_csmm #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hybg_csmm' + character(len=20) :: name='s_cuda_hybg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,5 +131,5 @@ subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hybg_csmm +end subroutine psb_s_cuda_hybg_csmm #endif diff --git a/cuda/impl/psb_s_hybg_csmv.F90 b/cuda/impl/psb_s_cuda_hybg_csmv.F90 similarity index 93% rename from cuda/impl/psb_s_hybg_csmv.F90 rename to cuda/impl/psb_s_cuda_hybg_csmv.F90 index d20740a6..01642146 100644 --- a/cuda/impl/psb_s_hybg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_hybg_csmv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_csmv + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_csmv #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_s_hybg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hybg_csmv' + character(len=20) :: name='s_cuda_hybg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -134,5 +134,5 @@ subroutine psb_s_hybg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hybg_csmv +end subroutine psb_s_cuda_hybg_csmv #endif diff --git a/cuda/impl/psb_s_hybg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_s_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 index 95920fc9..f0006f5c 100644 --- a/cuda/impl/psb_s_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_inner_vect_sv + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_inner_vect_sv #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -52,7 +52,7 @@ subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='s_hybg_inner_vect_sv' + character(len=20) :: name='s_cuda_hybg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -84,9 +84,9 @@ subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= szero) then if (yy%is_host()) call yy%sync() @@ -134,5 +134,5 @@ subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hybg_inner_vect_sv +end subroutine psb_s_cuda_hybg_inner_vect_sv #endif diff --git a/cuda/impl/psb_s_hybg_mold.F90 b/cuda/impl/psb_s_cuda_hybg_mold.F90 similarity index 89% rename from cuda/impl/psb_s_hybg_mold.F90 rename to cuda/impl/psb_s_cuda_hybg_mold.F90 index 882990c0..4a1fc64e 100644 --- a/cuda/impl/psb_s_hybg_mold.F90 +++ b/cuda/impl/psb_s_cuda_hybg_mold.F90 @@ -30,12 +30,12 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_mold(a,b,info) +subroutine psb_s_cuda_hybg_mold(a,b,info) use psb_base_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_mold + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_mold implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_hybg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_hybg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_hybg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,5 +62,5 @@ subroutine psb_s_hybg_mold(a,b,info) return -end subroutine psb_s_hybg_mold +end subroutine psb_s_cuda_hybg_mold #endif diff --git a/cuda/impl/psb_s_hybg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 similarity index 88% rename from cuda/impl/psb_s_hybg_reallocate_nz.F90 rename to cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 index 46079a92..7ee15f52 100644 --- a/cuda/impl/psb_s_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_reallocate_nz(nz,a) +subroutine psb_s_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_reallocate_nz + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_reallocate_nz #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='s_hybg_reallocate_nz' + character(len=20) :: name='s_cuda_hybg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,5 +67,5 @@ subroutine psb_s_hybg_reallocate_nz(nz,a) return -end subroutine psb_s_hybg_reallocate_nz +end subroutine psb_s_cuda_hybg_reallocate_nz #endif diff --git a/cuda/impl/psb_s_hybg_scal.F90 b/cuda/impl/psb_s_cuda_hybg_scal.F90 similarity index 91% rename from cuda/impl/psb_s_hybg_scal.F90 rename to cuda/impl/psb_s_cuda_hybg_scal.F90 index a55a8b2c..7a3978b7 100644 --- a/cuda/impl/psb_s_hybg_scal.F90 +++ b/cuda/impl/psb_s_cuda_hybg_scal.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_scal(d,a,info,side) +subroutine psb_s_cuda_hybg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_scal + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_scal #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,5 +72,5 @@ subroutine psb_s_hybg_scal(d,a,info,side) return -end subroutine psb_s_hybg_scal +end subroutine psb_s_cuda_hybg_scal #endif diff --git a/cuda/impl/psb_s_hybg_scals.F90 b/cuda/impl/psb_s_cuda_hybg_scals.F90 similarity index 91% rename from cuda/impl/psb_s_hybg_scals.F90 rename to cuda/impl/psb_s_cuda_hybg_scals.F90 index ae92166f..a19ae3f6 100644 --- a/cuda/impl/psb_s_hybg_scals.F90 +++ b/cuda/impl/psb_s_cuda_hybg_scals.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_scals(d,a,info) +subroutine psb_s_cuda_hybg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_scals + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_scals #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -72,5 +72,5 @@ subroutine psb_s_hybg_scals(d,a,info) return -end subroutine psb_s_hybg_scals +end subroutine psb_s_cuda_hybg_scals #endif diff --git a/cuda/impl/psb_s_hybg_to_gpu.F90 b/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 similarity index 96% rename from cuda/impl/psb_s_hybg_to_gpu.F90 rename to cuda/impl/psb_s_cuda_hybg_to_gpu.F90 index bfb9b261..ec415176 100644 --- a/cuda/impl/psb_s_hybg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_to_gpu(a,info,nzrm) +subroutine psb_s_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_to_gpu + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_to_gpu #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -150,5 +150,5 @@ subroutine psb_s_hybg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_s_hybg_to_gpu +end subroutine psb_s_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_s_hybg_vect_mv.F90 b/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_s_hybg_vect_mv.F90 rename to cuda/impl/psb_s_cuda_hybg_vect_mv.F90 index 5fe102f6..a83c4561 100644 --- a/cuda/impl/psb_s_hybg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 @@ -30,20 +30,20 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_vect_mv + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_vect_mv #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -53,7 +53,7 @@ subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hybg_vect_mv' + character(len=20) :: name='s_cuda_hybg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= szero) then if (yy%is_host()) call yy%sync() @@ -123,5 +123,5 @@ subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hybg_vect_mv +end subroutine psb_s_cuda_hybg_vect_mv #endif diff --git a/cuda/impl/psb_s_mv_csrg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_mv_csrg_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 index 01c9db06..a9e297bd 100644 --- a/cuda/impl/psb_s_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_mv_csrg_from_coo(a,b,info) +subroutine psb_s_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_mv_csrg_from_coo + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_mv_csrg_from_coo #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -62,4 +62,4 @@ subroutine psb_s_mv_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_mv_csrg_from_coo +end subroutine psb_s_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_s_mv_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_s_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 index 0ac28af3..54bc0ae4 100644 --- a/cuda/impl/psb_s_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_mv_csrg_from_fmt(a,b,info) +subroutine psb_s_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_mv_csrg_from_fmt + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_mv_csrg_from_fmt #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer, intent(out) :: info @@ -60,4 +60,4 @@ subroutine psb_s_mv_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_s_mv_csrg_from_fmt +end subroutine psb_s_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_s_mv_diag_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_mv_diag_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 index f51607e5..fda60d96 100644 --- a/cuda/impl/psb_s_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_s_mv_diag_from_coo(a,b,info) +subroutine psb_s_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_mv_diag_from_coo + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_mv_diag_from_coo #else - use psb_s_diag_mat_mod + use psb_s_cuda_diag_mat_mod #endif implicit none - class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -66,4 +66,4 @@ subroutine psb_s_mv_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_mv_diag_from_coo +end subroutine psb_s_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_s_mv_elg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_mv_elg_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 index ac153f6c..447e2971 100644 --- a/cuda/impl/psb_s_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_mv_elg_from_coo(a,b,info) +subroutine psb_s_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_mv_elg_from_coo + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_mv_elg_from_coo #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_s_mv_elg_from_coo(a,b,info) return -end subroutine psb_s_mv_elg_from_coo +end subroutine psb_s_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_s_mv_elg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 similarity index 92% rename from cuda/impl/psb_s_mv_elg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 index 9238544c..e88080dd 100644 --- a/cuda/impl/psb_s_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_mv_elg_from_fmt(a,b,info) +subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_mv_elg_from_fmt + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_mv_elg_from_fmt #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -96,4 +96,4 @@ subroutine psb_s_mv_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_s_mv_elg_from_fmt +end subroutine psb_s_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_s_mv_hdiag_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_s_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 index dcbcfe4d..f3252eb2 100644 --- a/cuda/impl/psb_s_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_s_mv_hdiag_from_coo(a,b,info) +subroutine psb_s_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_mv_hdiag_from_coo - use psb_gpu_env_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_s_hdiag_mat_mod + use psb_s_cuda_hdiag_mat_mod #endif implicit none - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_s_mv_hdiag_from_coo(a,b,info) #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_s_hdia_sparse_mat%mv_from_coo(b,info) @@ -71,4 +71,4 @@ subroutine psb_s_mv_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_mv_hdiag_from_coo +end subroutine psb_s_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_s_mv_hlg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 similarity index 88% rename from cuda/impl/psb_s_mv_hlg_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 index dc72a135..9810a85e 100644 --- a/cuda/impl/psb_s_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_s_mv_hlg_from_coo(a,b,info) +subroutine psb_s_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_mv_hlg_from_coo + use psb_cuda_env_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_mv_hlg_from_coo #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_s_mv_hlg_from_coo(a,b,info) return -end subroutine psb_s_mv_hlg_from_coo +end subroutine psb_s_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_s_mv_hlg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_s_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 index bbe42e4a..700dc151 100644 --- a/cuda/impl/psb_s_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_mv_hlg_from_fmt(a,b,info) +subroutine psb_s_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_mv_hlg_from_fmt + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_mv_hlg_from_fmt #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_s_mv_hlg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_s_mv_hlg_from_fmt +end subroutine psb_s_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_s_mv_hybg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_mv_hybg_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 index 7d3197a8..ca9f34c1 100644 --- a/cuda/impl/psb_s_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_mv_hybg_from_coo(a,b,info) +subroutine psb_s_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_mv_hybg_from_coo + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_mv_hybg_from_coo #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -61,5 +61,5 @@ subroutine psb_s_mv_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_mv_hybg_from_coo +end subroutine psb_s_cuda_mv_hybg_from_coo #endif diff --git a/cuda/impl/psb_s_mv_hybg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_s_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 index 51d8a2e6..5ba606af 100644 --- a/cuda/impl/psb_s_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_mv_hybg_from_fmt(a,b,info) +subroutine psb_s_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_mv_hybg_from_fmt + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_mv_hybg_from_fmt #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_s_mv_hybg_from_fmt(a,b,info) call a%to_gpu(info) #endif end select -end subroutine psb_s_mv_hybg_from_fmt +end subroutine psb_s_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/impl/psb_z_cp_csrg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_cp_csrg_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 index c3b0eebd..186190ac 100644 --- a/cuda/impl/psb_z_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_z_cp_csrg_from_coo(a,b,info) +subroutine psb_z_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_cp_csrg_from_coo + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_cp_csrg_from_coo #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_z_cp_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_cp_csrg_from_coo +end subroutine psb_z_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_z_cp_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_z_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 index 218d6c7b..d1e1a82d 100644 --- a/cuda/impl/psb_z_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_z_cp_csrg_from_fmt(a,b,info) +subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_cp_csrg_from_fmt + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_cp_csrg_from_fmt #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif !use iso_c_binding implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_z_cp_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_z_cp_csrg_from_fmt +end subroutine psb_z_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_z_cp_diag_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_cp_diag_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 index 013e88cd..c303b127 100644 --- a/cuda/impl/psb_z_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_cp_diag_from_coo(a,b,info) +subroutine psb_z_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_cp_diag_from_coo + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_cp_diag_from_coo #else - use psb_z_diag_mat_mod + use psb_z_cuda_diag_mat_mod #endif implicit none - class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -61,4 +61,4 @@ subroutine psb_z_cp_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_cp_diag_from_coo +end subroutine psb_z_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_z_cp_elg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 similarity index 94% rename from cuda/impl/psb_z_cp_elg_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 index c9b61a99..4b18b89b 100644 --- a/cuda/impl/psb_z_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_z_cp_elg_from_coo(a,b,info) +subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_cp_elg_from_coo + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_cp_elg_from_coo use psi_ext_util_mod - use psb_gpu_env_mod + use psb_cuda_env_mod #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -58,7 +58,7 @@ subroutine psb_z_cp_elg_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - hacksize = max(1,psb_gpu_WarpSize()) + hacksize = max(1,psb_cuda_WarpSize()) #else hacksize = 1 #endif @@ -181,4 +181,4 @@ contains end subroutine psi_z_count_ell_from_coo -end subroutine psb_z_cp_elg_from_coo +end subroutine psb_z_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_z_cp_elg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 similarity index 93% rename from cuda/impl/psb_z_cp_elg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 index 23468b8a..6fa91de6 100644 --- a/cuda/impl/psb_z_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_cp_elg_from_fmt(a,b,info) +subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_cp_elg_from_fmt + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_cp_elg_from_fmt #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -98,4 +98,4 @@ subroutine psb_z_cp_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_z_cp_elg_from_fmt +end subroutine psb_z_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_z_cp_hdiag_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_z_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 index b44c2854..c94d8824 100644 --- a/cuda/impl/psb_z_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_z_cp_hdiag_from_coo(a,b,info) +subroutine psb_z_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_cp_hdiag_from_coo - use psb_gpu_env_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_z_hdiag_mat_mod + use psb_z_cuda_hdiag_mat_mod #endif implicit none - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_z_cp_hdiag_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_z_hdia_sparse_mat%cp_from_coo(b,info) @@ -70,4 +70,4 @@ subroutine psb_z_cp_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_cp_hdiag_from_coo +end subroutine psb_z_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_z_cp_hlg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 similarity index 95% rename from cuda/impl/psb_z_cp_hlg_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 index 51d0c8e6..1607f1b6 100644 --- a/cuda/impl/psb_z_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_z_cp_hlg_from_coo(a,b,info) +subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_cp_hlg_from_coo + use psb_cuda_env_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_cp_hlg_from_coo #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -62,7 +62,7 @@ subroutine psb_z_cp_hlg_from_coo(a,b,info) debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() #ifdef HAVE_SPGPU - hksz = max(1,psb_gpu_WarpSize()) + hksz = max(1,psb_cuda_WarpSize()) #else hksz = psi_get_hksz() #endif @@ -195,4 +195,4 @@ contains !!$ write(*,*) 'End of psi_comput_hckoff ',info end subroutine psi_compute_hckoff_from_coo -end subroutine psb_z_cp_hlg_from_coo +end subroutine psb_z_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_z_cp_hlg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 similarity index 90% rename from cuda/impl/psb_z_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 index a6dd5970..e8c1f95d 100644 --- a/cuda/impl/psb_z_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_cp_hlg_from_fmt(a,b,info) +subroutine psb_z_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_cp_hlg_from_fmt + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_cp_hlg_from_fmt #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -65,4 +65,4 @@ subroutine psb_z_cp_hlg_from_fmt(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_cp_hlg_from_fmt +end subroutine psb_z_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_z_cp_hybg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_cp_hybg_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 index ebb6f60a..6031526a 100644 --- a/cuda/impl/psb_z_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_cp_hybg_from_coo(a,b,info) +subroutine psb_z_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_cp_hybg_from_coo + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_cp_hybg_from_coo #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -60,5 +60,5 @@ subroutine psb_z_cp_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_cp_hybg_from_coo +end subroutine psb_z_cuda_cp_hybg_from_coo #endif diff --git a/cuda/impl/psb_z_cp_hybg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_z_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 index 82f2ac65..0202ef24 100644 --- a/cuda/impl/psb_z_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_cp_hybg_from_fmt(a,b,info) +subroutine psb_z_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_cp_hybg_from_fmt + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_cp_hybg_from_fmt #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_z_cp_hybg_from_fmt(a,b,info) #endif end select -end subroutine psb_z_cp_hybg_from_fmt +end subroutine psb_z_cuda_cp_hybg_from_fmt #endif diff --git a/cuda/impl/psb_z_csrg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 similarity index 89% rename from cuda/impl/psb_z_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 index 8cb2ccb1..a7533e59 100644 --- a/cuda/impl/psb_z_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_csrg_allocate_mnnz(m,n,a,nz) +subroutine psb_z_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_allocate_mnnz + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_allocate_mnnz #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,4 +65,4 @@ subroutine psb_z_csrg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_z_csrg_allocate_mnnz +end subroutine psb_z_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_z_csrg_csmm.F90 b/cuda/impl/psb_z_cuda_csrg_csmm.F90 similarity index 94% rename from cuda/impl/psb_z_csrg_csmm.F90 rename to cuda/impl/psb_z_cuda_csrg_csmm.F90 index eb8a4d7f..49fb9fcf 100644 --- a/cuda/impl/psb_z_csrg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_csrg_csmm.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_csrg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_csmm + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_csmm #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -131,4 +131,4 @@ subroutine psb_z_csrg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_csrg_csmm +end subroutine psb_z_cuda_csrg_csmm diff --git a/cuda/impl/psb_z_csrg_csmv.F90 b/cuda/impl/psb_z_cuda_csrg_csmv.F90 similarity index 93% rename from cuda/impl/psb_z_csrg_csmv.F90 rename to cuda/impl/psb_z_cuda_csrg_csmv.F90 index 10546eb1..54ad6f4f 100644 --- a/cuda/impl/psb_z_csrg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_csrg_csmv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_csrg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_csmv + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_csmv #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_z_csrg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_csrg_csmv' + character(len=20) :: name='z_cuda_csrg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -136,4 +136,4 @@ subroutine psb_z_csrg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_csrg_csmv +end subroutine psb_z_cuda_csrg_csmv diff --git a/cuda/impl/psb_d_csrg_from_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_d_csrg_from_gpu.F90 rename to cuda/impl/psb_z_cuda_csrg_from_gpu.F90 index 9c0237f0..bb3b49d5 100644 --- a/cuda/impl/psb_d_csrg_from_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_csrg_from_gpu(a,info) +subroutine psb_z_cuda_csrg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_from_gpu + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_from_gpu #else - use psb_d_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, n, nz @@ -70,4 +70,4 @@ subroutine psb_d_csrg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_d_csrg_from_gpu +end subroutine psb_z_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_z_csrg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_z_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 index 75d6800b..6c7b1fcb 100644 --- a/cuda/impl/psb_z_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_inner_vect_sv + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_inner_vect_sv #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -51,7 +51,7 @@ subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='z_csrg_inner_vect_sv' + character(len=20) :: name='z_cuda_csrg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -83,9 +83,9 @@ subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -133,4 +133,4 @@ subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_csrg_inner_vect_sv +end subroutine psb_z_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_z_csrg_mold.F90 b/cuda/impl/psb_z_cuda_csrg_mold.F90 similarity index 88% rename from cuda/impl/psb_z_csrg_mold.F90 rename to cuda/impl/psb_z_cuda_csrg_mold.F90 index e83deb3f..23bb658a 100644 --- a/cuda/impl/psb_z_csrg_mold.F90 +++ b/cuda/impl/psb_z_cuda_csrg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_csrg_mold(a,b,info) +subroutine psb_z_cuda_csrg_mold(a,b,info) use psb_base_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_mold + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_mold implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_csrg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_csrg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_csrg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_z_csrg_mold(a,b,info) return -end subroutine psb_z_csrg_mold +end subroutine psb_z_cuda_csrg_mold diff --git a/cuda/impl/psb_z_csrg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_z_csrg_reallocate_nz.F90 rename to cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 index c2509c22..61ae0f59 100644 --- a/cuda/impl/psb_z_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_z_csrg_reallocate_nz(nz,a) +subroutine psb_z_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_reallocate_nz + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_reallocate_nz #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='z_csrg_reallocate_nz' + character(len=20) :: name='z_cuda_csrg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,4 +67,4 @@ subroutine psb_z_csrg_reallocate_nz(nz,a) return -end subroutine psb_z_csrg_reallocate_nz +end subroutine psb_z_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_z_csrg_scal.F90 b/cuda/impl/psb_z_cuda_csrg_scal.F90 similarity index 90% rename from cuda/impl/psb_z_csrg_scal.F90 rename to cuda/impl/psb_z_cuda_csrg_scal.F90 index d8ab0ca3..a2099933 100644 --- a/cuda/impl/psb_z_csrg_scal.F90 +++ b/cuda/impl/psb_z_cuda_csrg_scal.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_z_csrg_scal(d,a,info,side) +subroutine psb_z_cuda_csrg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_scal + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_scal #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -70,4 +70,4 @@ subroutine psb_z_csrg_scal(d,a,info,side) return -end subroutine psb_z_csrg_scal +end subroutine psb_z_cuda_csrg_scal diff --git a/cuda/impl/psb_z_csrg_scals.F90 b/cuda/impl/psb_z_cuda_csrg_scals.F90 similarity index 90% rename from cuda/impl/psb_z_csrg_scals.F90 rename to cuda/impl/psb_z_cuda_csrg_scals.F90 index 3d14998d..72fee99b 100644 --- a/cuda/impl/psb_z_csrg_scals.F90 +++ b/cuda/impl/psb_z_cuda_csrg_scals.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_z_csrg_scals(d,a,info) +subroutine psb_z_cuda_csrg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_scals + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_scals #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -68,4 +68,4 @@ subroutine psb_z_csrg_scals(d,a,info) return -end subroutine psb_z_csrg_scals +end subroutine psb_z_cuda_csrg_scals diff --git a/cuda/impl/psb_z_csrg_to_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 similarity index 98% rename from cuda/impl/psb_z_csrg_to_gpu.F90 rename to cuda/impl/psb_z_cuda_csrg_to_gpu.F90 index 4548935d..c6f217ab 100644 --- a/cuda/impl/psb_z_csrg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_z_csrg_to_gpu(a,info,nzrm) +subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_to_gpu + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_to_gpu #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -322,4 +322,4 @@ subroutine psb_z_csrg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_z_csrg_to_gpu +end subroutine psb_z_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_z_csrg_vect_mv.F90 b/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_z_csrg_vect_mv.F90 rename to cuda/impl/psb_z_cuda_csrg_vect_mv.F90 index 0770d448..964134fb 100644 --- a/cuda/impl/psb_z_csrg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_vect_mv + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_vect_mv #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -54,7 +54,7 @@ subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_csrg_vect_mv' + character(len=20) :: name='z_cuda_csrg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= zzero) then if (yy%is_host()) call yy%sync() @@ -122,4 +122,4 @@ subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) 9999 call psb_error_handler(err_act) return -end subroutine psb_z_csrg_vect_mv +end subroutine psb_z_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_z_diag_csmv.F90 b/cuda/impl/psb_z_cuda_diag_csmv.F90 similarity index 92% rename from cuda/impl/psb_z_diag_csmv.F90 rename to cuda/impl/psb_z_cuda_diag_csmv.F90 index 667e1a1f..2e86f0f8 100644 --- a/cuda/impl/psb_z_diag_csmv.F90 +++ b/cuda/impl/psb_z_cuda_diag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_csmv + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_csmv #else - use psb_z_diag_mat_mod + use psb_z_cuda_diag_mat_mod #endif implicit none - class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='z_diag_csmv' + character(len=20) :: name='z_cuda_diag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_diag_csmv +end subroutine psb_z_cuda_diag_csmv diff --git a/cuda/impl/psb_z_diag_mold.F90 b/cuda/impl/psb_z_cuda_diag_mold.F90 similarity index 88% rename from cuda/impl/psb_z_diag_mold.F90 rename to cuda/impl/psb_z_cuda_diag_mold.F90 index 5cd752ce..5b11b41c 100644 --- a/cuda/impl/psb_z_diag_mold.F90 +++ b/cuda/impl/psb_z_cuda_diag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_diag_mold(a,b,info) +subroutine psb_z_cuda_diag_mold(a,b,info) use psb_base_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_mold + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_mold implicit none - class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_diag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_diag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_diag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_z_diag_mold(a,b,info) return -end subroutine psb_z_diag_mold +end subroutine psb_z_cuda_diag_mold diff --git a/cuda/impl/psb_z_diag_to_gpu.F90 b/cuda/impl/psb_z_cuda_diag_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_z_diag_to_gpu.F90 rename to cuda/impl/psb_z_cuda_diag_to_gpu.F90 index 40913624..a28858b5 100644 --- a/cuda/impl/psb_z_diag_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_diag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_diag_to_gpu(a,info,nzrm) +subroutine psb_z_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_to_gpu + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_to_gpu #else - use psb_z_diag_mat_mod + use psb_z_cuda_diag_mat_mod #endif use iso_c_binding implicit none - class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -71,4 +71,4 @@ subroutine psb_z_diag_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_z_diag_to_gpu +end subroutine psb_z_cuda_diag_to_gpu diff --git a/cuda/impl/psb_z_diag_vect_mv.F90 b/cuda/impl/psb_z_cuda_diag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_z_diag_vect_mv.F90 rename to cuda/impl/psb_z_cuda_diag_vect_mv.F90 index b8946491..12f3c3e7 100644 --- a/cuda/impl/psb_z_diag_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_diag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_vect_mv + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_vect_mv #else - use psb_z_diag_mat_mod + use psb_z_cuda_diag_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_diag_vect_mv' + character(len=20) :: name='z_cuda_diag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_diag_vect_mv +end subroutine psb_z_cuda_diag_vect_mv diff --git a/cuda/impl/psb_z_dnsg_mat_impl.F90 b/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 similarity index 77% rename from cuda/impl/psb_z_dnsg_mat_impl.F90 rename to cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 index 407deaa2..c2a641b6 100644 --- a/cuda/impl/psb_z_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_dnsg_vect_mv + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_vect_mv #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(in) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -50,7 +50,7 @@ subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) character :: trans_ complex(psb_dpk_), allocatable :: rx(:), ry(:) Integer(Psb_ipk_) :: err_act, m, n, k - character(len=20) :: name='z_dnsg_vect_mv' + character(len=20) :: name='z_cuda_dnsg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -76,9 +76,9 @@ subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) k = a%get_nrows() end if select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= zzero) then @@ -117,21 +117,21 @@ subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_dnsg_vect_mv +end subroutine psb_z_cuda_dnsg_vect_mv -subroutine psb_z_dnsg_mold(a,b,info) +subroutine psb_z_cuda_dnsg_mold(a,b,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_dnsg_mold + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_mold #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(in) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -145,7 +145,7 @@ subroutine psb_z_dnsg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_dnsg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_dnsg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -158,54 +158,54 @@ subroutine psb_z_dnsg_mold(a,b,info) return -end subroutine psb_z_dnsg_mold +end subroutine psb_z_cuda_dnsg_mold !!$ !!$ interface -!!$ subroutine psb_z_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_z_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type -!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_z_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_dnsg_inner_vect_sv +!!$ end subroutine psb_z_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_z_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_z_dnsg_reallocate_nz +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_z_dnsg_allocate_mnnz +!!$ end subroutine psb_z_cuda_dnsg_allocate_mnnz !!$ end interface -subroutine psb_z_dnsg_to_gpu(a,info) +subroutine psb_z_cuda_dnsg_to_gpu(a,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_dnsg_to_gpu + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_to_gpu #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda logical, parameter :: debug=.false. - character(len=20) :: name='z_dnsg_to_gpu' + character(len=20) :: name='z_cuda_dnsg_to_gpu' call psb_erractionsave(err_act) info = psb_success_ @@ -226,27 +226,27 @@ subroutine psb_z_dnsg_to_gpu(a,info) return -end subroutine psb_z_dnsg_to_gpu +end subroutine psb_z_cuda_dnsg_to_gpu -subroutine psb_z_cp_dnsg_from_coo(a,b,info) +subroutine psb_z_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_cp_dnsg_from_coo + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_cp_dnsg_from_coo #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_dnsg_cp_from_coo' + character(len=20) :: name='z_cuda_dnsg_cp_from_coo' integer(psb_ipk_) :: debug_level, debug_unit logical, parameter :: debug=.false. type(psb_z_coo_sparse_mat) :: tmp @@ -267,27 +267,27 @@ subroutine psb_z_cp_dnsg_from_coo(a,b,info) return -end subroutine psb_z_cp_dnsg_from_coo +end subroutine psb_z_cuda_cp_dnsg_from_coo -subroutine psb_z_cp_dnsg_from_fmt(a,b,info) +subroutine psb_z_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_cp_dnsg_from_fmt + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_cp_dnsg_from_fmt #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info type(psb_z_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_dnsg_cp_from_fmt' + character(len=20) :: name='z_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -341,29 +341,29 @@ subroutine psb_z_cp_dnsg_from_fmt(a,b,info) return -end subroutine psb_z_cp_dnsg_from_fmt +end subroutine psb_z_cuda_cp_dnsg_from_fmt -subroutine psb_z_mv_dnsg_from_coo(a,b,info) +subroutine psb_z_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_mv_dnsg_from_coo + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_mv_dnsg_from_coo #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act logical, parameter :: debug=.false. - character(len=20) :: name='z_dnsg_mv_from_coo' + character(len=20) :: name='z_cuda_dnsg_mv_from_coo' call psb_erractionsave(err_act) info = psb_success_ @@ -382,28 +382,28 @@ subroutine psb_z_mv_dnsg_from_coo(a,b,info) return -end subroutine psb_z_mv_dnsg_from_coo +end subroutine psb_z_cuda_mv_dnsg_from_coo -subroutine psb_z_mv_dnsg_from_fmt(a,b,info) +subroutine psb_z_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_mv_dnsg_from_fmt + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_mv_dnsg_from_fmt #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info type(psb_z_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_dnsg_cp_from_fmt' + character(len=20) :: name='z_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -458,4 +458,4 @@ subroutine psb_z_mv_dnsg_from_fmt(a,b,info) return -end subroutine psb_z_mv_dnsg_from_fmt +end subroutine psb_z_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_z_elg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 similarity index 93% rename from cuda/impl/psb_z_elg_allocate_mnnz.F90 rename to cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 index 39d14dd2..a2e36feb 100644 --- a/cuda/impl/psb_z_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_elg_allocate_mnnz(m,n,a,nz) +subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_allocate_mnnz + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_allocate_mnnz #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -110,4 +110,4 @@ subroutine psb_z_elg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_z_elg_allocate_mnnz +end subroutine psb_z_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_z_elg_asb.f90 b/cuda/impl/psb_z_cuda_elg_asb.f90 similarity index 92% rename from cuda/impl/psb_z_elg_asb.f90 rename to cuda/impl/psb_z_cuda_elg_asb.f90 index 515f579a..511183f5 100644 --- a/cuda/impl/psb_z_elg_asb.f90 +++ b/cuda/impl/psb_z_cuda_elg_asb.f90 @@ -30,13 +30,13 @@ ! -subroutine psb_z_elg_asb(a) +subroutine psb_z_cuda_elg_asb(a) use psb_base_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_asb + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_asb implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: err_act, info character(len=20) :: name='elg_asb' @@ -62,4 +62,4 @@ subroutine psb_z_elg_asb(a) return -end subroutine psb_z_elg_asb +end subroutine psb_z_cuda_elg_asb diff --git a/cuda/impl/psb_z_elg_csmm.F90 b/cuda/impl/psb_z_cuda_elg_csmm.F90 similarity index 93% rename from cuda/impl/psb_z_elg_csmm.F90 rename to cuda/impl/psb_z_cuda_elg_csmm.F90 index aa27419c..d4034b65 100644 --- a/cuda/impl/psb_z_elg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_elg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csmm + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csmm #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_elg_csmm' + character(len=20) :: name='z_cuda_elg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,4 +131,4 @@ subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_elg_csmm +end subroutine psb_z_cuda_elg_csmm diff --git a/cuda/impl/psb_z_elg_csmv.F90 b/cuda/impl/psb_z_cuda_elg_csmv.F90 similarity index 94% rename from cuda/impl/psb_z_elg_csmv.F90 rename to cuda/impl/psb_z_cuda_elg_csmv.F90 index 46bc9615..eba12d16 100644 --- a/cuda/impl/psb_z_elg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_elg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csmv + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csmv #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -133,4 +133,4 @@ subroutine psb_z_elg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_elg_csmv +end subroutine psb_z_cuda_elg_csmv diff --git a/cuda/impl/psb_z_elg_csput.F90 b/cuda/impl/psb_z_cuda_elg_csput.F90 similarity index 89% rename from cuda/impl/psb_z_elg_csput.F90 rename to cuda/impl/psb_z_cuda_elg_csput.F90 index 51080f5b..5a52f4f8 100644 --- a/cuda/impl/psb_z_elg_csput.F90 +++ b/cuda/impl/psb_z_cuda_elg_csput.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_z_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_z_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csput_a + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csput_a #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - character(len=20) :: name='z_elg_csput_a' + character(len=20) :: name='z_cuda_elg_csput_a' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit real(psb_dpk_) :: t1,t2,t3 @@ -120,24 +120,24 @@ subroutine psb_z_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_z_elg_csput_a +end subroutine psb_z_cuda_elg_csput_a -subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_z_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csput_v - use psb_z_gpu_vect_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csput_v + use psb_z_cuda_vect_mod #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax @@ -145,7 +145,7 @@ subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act - character(len=20) :: name='z_elg_csput_v' + character(len=20) :: name='z_cuda_elg_csput_v' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw logical :: gpu_invoked @@ -199,11 +199,11 @@ subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) t1=psb_wtime() gpu_invoked = .false. select type (ia) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (ja) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (val) - class is (psb_z_vect_gpu) + class is (psb_z_vect_cuda) if (a%is_host()) call a%sync() if (val%is_host()) call val%sync() if (ia%is_host()) call ia%sync() @@ -245,4 +245,4 @@ subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_z_elg_csput_v +end subroutine psb_z_cuda_elg_csput_v diff --git a/cuda/impl/psb_z_elg_from_gpu.F90 b/cuda/impl/psb_z_cuda_elg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_z_elg_from_gpu.F90 rename to cuda/impl/psb_z_cuda_elg_from_gpu.F90 index e8670cd4..ffed4349 100644 --- a/cuda/impl/psb_z_elg_from_gpu.F90 +++ b/cuda/impl/psb_z_cuda_elg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_from_gpu(a,info) +subroutine psb_z_cuda_elg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_from_gpu + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_from_gpu #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize @@ -71,4 +71,4 @@ subroutine psb_z_elg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_z_elg_from_gpu +end subroutine psb_z_cuda_elg_from_gpu diff --git a/cuda/impl/psb_z_elg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 similarity index 89% rename from cuda/impl/psb_z_elg_inner_vect_sv.F90 rename to cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 index 66d7eed8..7564d5dd 100644 --- a/cuda/impl/psb_z_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_z_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_inner_vect_sv + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_inner_vect_sv #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans integer(psb_ipk_) :: err_act - character(len=20) :: name='z_elg_inner_vect_sv' + character(len=20) :: name='z_cuda_elg_inner_vect_sv' logical, parameter :: debug=.false. complex(psb_dpk_), allocatable :: rx(:), ry(:) @@ -86,4 +86,4 @@ subroutine psb_z_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_elg_inner_vect_sv +end subroutine psb_z_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_z_elg_mold.F90 b/cuda/impl/psb_z_cuda_elg_mold.F90 similarity index 89% rename from cuda/impl/psb_z_elg_mold.F90 rename to cuda/impl/psb_z_cuda_elg_mold.F90 index 1a5ebe54..e027c9f2 100644 --- a/cuda/impl/psb_z_elg_mold.F90 +++ b/cuda/impl/psb_z_cuda_elg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_elg_mold(a,b,info) +subroutine psb_z_cuda_elg_mold(a,b,info) use psb_base_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_mold + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_mold implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_elg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_elg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_elg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_z_elg_mold(a,b,info) return -end subroutine psb_z_elg_mold +end subroutine psb_z_cuda_elg_mold diff --git a/cuda/impl/psb_z_elg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 similarity index 89% rename from cuda/impl/psb_z_elg_reallocate_nz.F90 rename to cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 index f6bc194f..16cebe70 100644 --- a/cuda/impl/psb_z_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_z_elg_reallocate_nz(nz,a) +subroutine psb_z_cuda_elg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_reallocate_nz + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_reallocate_nz #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='z_elg_reallocate_nz' + character(len=20) :: name='z_cuda_elg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -76,4 +76,4 @@ subroutine psb_z_elg_reallocate_nz(nz,a) return -end subroutine psb_z_elg_reallocate_nz +end subroutine psb_z_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_z_elg_scal.F90 b/cuda/impl/psb_z_cuda_elg_scal.F90 similarity index 91% rename from cuda/impl/psb_z_elg_scal.F90 rename to cuda/impl/psb_z_cuda_elg_scal.F90 index eed9007a..4802aaaa 100644 --- a/cuda/impl/psb_z_elg_scal.F90 +++ b/cuda/impl/psb_z_cuda_elg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_scal(d,a,info,side) +subroutine psb_z_cuda_elg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_scal + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_scal #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -75,4 +75,4 @@ subroutine psb_z_elg_scal(d,a,info,side) return -end subroutine psb_z_elg_scal +end subroutine psb_z_cuda_elg_scal diff --git a/cuda/impl/psb_z_elg_scals.F90 b/cuda/impl/psb_z_cuda_elg_scals.F90 similarity index 90% rename from cuda/impl/psb_z_elg_scals.F90 rename to cuda/impl/psb_z_cuda_elg_scals.F90 index 1e3f3682..5db823da 100644 --- a/cuda/impl/psb_z_elg_scals.F90 +++ b/cuda/impl/psb_z_cuda_elg_scals.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_scals(d,a,info) +subroutine psb_z_cuda_elg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_scals + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_scals #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_z_elg_scals(d,a,info) return -end subroutine psb_z_elg_scals +end subroutine psb_z_cuda_elg_scals diff --git a/cuda/impl/psb_z_elg_to_gpu.F90 b/cuda/impl/psb_z_cuda_elg_to_gpu.F90 similarity index 93% rename from cuda/impl/psb_z_elg_to_gpu.F90 rename to cuda/impl/psb_z_cuda_elg_to_gpu.F90 index 71a5ec66..6d86bdd9 100644 --- a/cuda/impl/psb_z_elg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_elg_to_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_to_gpu(a,info,nzrm) +subroutine psb_z_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_to_gpu + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_to_gpu #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -90,4 +90,4 @@ subroutine psb_z_elg_to_gpu(a,info,nzrm) call a%set_sync() #endif -end subroutine psb_z_elg_to_gpu +end subroutine psb_z_cuda_elg_to_gpu diff --git a/cuda/impl/psb_c_elg_trim.f90 b/cuda/impl/psb_z_cuda_elg_trim.f90 similarity index 92% rename from cuda/impl/psb_c_elg_trim.f90 rename to cuda/impl/psb_z_cuda_elg_trim.f90 index bc0c0696..3d261150 100644 --- a/cuda/impl/psb_c_elg_trim.f90 +++ b/cuda/impl/psb_z_cuda_elg_trim.f90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_elg_trim(a) +subroutine psb_z_cuda_elg_trim(a) use psb_base_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_trim + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_trim implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld character(len=20) :: name='trim' logical, parameter :: debug=.false. @@ -59,4 +59,4 @@ subroutine psb_c_elg_trim(a) return -end subroutine psb_c_elg_trim +end subroutine psb_z_cuda_elg_trim diff --git a/cuda/impl/psb_z_elg_vect_mv.F90 b/cuda/impl/psb_z_cuda_elg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_z_elg_vect_mv.F90 rename to cuda/impl/psb_z_cuda_elg_vect_mv.F90 index 5cd72e44..4bd1b3ed 100644 --- a/cuda/impl/psb_z_elg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_elg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_vect_mv + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_vect_mv #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_elg_vect_mv' + character(len=20) :: name='z_cuda_elg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= zzero) then @@ -128,4 +128,4 @@ subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_elg_vect_mv +end subroutine psb_z_cuda_elg_vect_mv diff --git a/cuda/impl/psb_z_hdiag_csmv.F90 b/cuda/impl/psb_z_cuda_hdiag_csmv.F90 similarity index 92% rename from cuda/impl/psb_z_hdiag_csmv.F90 rename to cuda/impl/psb_z_cuda_hdiag_csmv.F90 index baf730a2..8be14704 100644 --- a/cuda/impl/psb_z_hdiag_csmv.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_csmv + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_csmv #else - use psb_z_hdiag_mat_mod + use psb_z_cuda_hdiag_mat_mod #endif implicit none - class(psb_z_hdiag_sparse_mat), intent(in) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='z_hdiag_csmv' + character(len=20) :: name='z_cuda_hdiag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hdiag_csmv +end subroutine psb_z_cuda_hdiag_csmv diff --git a/cuda/impl/psb_z_hdiag_mold.F90 b/cuda/impl/psb_z_cuda_hdiag_mold.F90 similarity index 88% rename from cuda/impl/psb_z_hdiag_mold.F90 rename to cuda/impl/psb_z_cuda_hdiag_mold.F90 index b656ed0f..33fdd8eb 100644 --- a/cuda/impl/psb_z_hdiag_mold.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_hdiag_mold(a,b,info) +subroutine psb_z_cuda_hdiag_mold(a,b,info) use psb_base_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_mold + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_mold implicit none - class(psb_z_hdiag_sparse_mat), intent(in) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_hdiag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_hdiag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_hdiag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_z_hdiag_mold(a,b,info) return -end subroutine psb_z_hdiag_mold +end subroutine psb_z_cuda_hdiag_mold diff --git a/cuda/impl/psb_d_hdiag_to_gpu.F90 b/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 similarity index 92% rename from cuda/impl/psb_d_hdiag_to_gpu.F90 rename to cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 index fb013586..47126aca 100644 --- a/cuda/impl/psb_d_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hdiag_to_gpu(a,info) +subroutine psb_z_cuda_hdiag_to_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_to_gpu + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_to_gpu #else - use psb_d_hdiag_mat_mod + use psb_z_cuda_hdiag_mat_mod #endif use iso_c_binding implicit none - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight #ifdef HAVE_SPGPU @@ -83,4 +83,4 @@ subroutine psb_d_hdiag_to_gpu(a,info) #endif -end subroutine psb_d_hdiag_to_gpu +end subroutine psb_z_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_z_hdiag_vect_mv.F90 b/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_z_hdiag_vect_mv.F90 rename to cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 index 3e1c859e..cf0b3457 100644 --- a/cuda/impl/psb_z_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_vect_mv + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_vect_mv #else - use psb_z_hdiag_mat_mod + use psb_z_cuda_hdiag_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_hdiag_sparse_mat), intent(in) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hdiag_vect_mv' + character(len=20) :: name='z_cuda_hdiag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hdiag_vect_mv +end subroutine psb_z_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_z_hlg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_z_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 index e3c05ec1..228244f1 100644 --- a/cuda/impl/psb_z_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hlg_allocate_mnnz(m,n,a,nz) +subroutine psb_z_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_allocate_mnnz + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_allocate_mnnz #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -68,4 +68,4 @@ subroutine psb_z_hlg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_z_hlg_allocate_mnnz +end subroutine psb_z_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_z_hlg_csmm.F90 b/cuda/impl/psb_z_cuda_hlg_csmm.F90 similarity index 93% rename from cuda/impl/psb_z_hlg_csmm.F90 rename to cuda/impl/psb_z_cuda_hlg_csmm.F90 index 3432c177..325ab0d0 100644 --- a/cuda/impl/psb_z_hlg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_hlg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_csmm + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_csmm #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hlg_csmm' + character(len=20) :: name='z_cuda_hlg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -129,4 +129,4 @@ subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hlg_csmm +end subroutine psb_z_cuda_hlg_csmm diff --git a/cuda/impl/psb_z_hlg_csmv.F90 b/cuda/impl/psb_z_cuda_hlg_csmv.F90 similarity index 93% rename from cuda/impl/psb_z_hlg_csmv.F90 rename to cuda/impl/psb_z_cuda_hlg_csmv.F90 index b9b79e0c..ac84190e 100644 --- a/cuda/impl/psb_z_hlg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_hlg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_hlg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_csmv + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_csmv #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_hlg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='z_hlg_csmv' + character(len=20) :: name='z_cuda_hlg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -132,4 +132,4 @@ subroutine psb_z_hlg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hlg_csmv +end subroutine psb_z_cuda_hlg_csmv diff --git a/cuda/impl/psb_z_hlg_from_gpu.F90 b/cuda/impl/psb_z_cuda_hlg_from_gpu.F90 similarity index 92% rename from cuda/impl/psb_z_hlg_from_gpu.F90 rename to cuda/impl/psb_z_cuda_hlg_from_gpu.F90 index f582e506..4db6c3ce 100644 --- a/cuda/impl/psb_z_hlg_from_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hlg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_hlg_from_gpu(a,info) +subroutine psb_z_cuda_hlg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_from_gpu + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_from_gpu #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr @@ -73,4 +73,4 @@ subroutine psb_z_hlg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_z_hlg_from_gpu +end subroutine psb_z_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_z_hlg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_z_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 index 5a7b1031..f99a5a9e 100644 --- a/cuda/impl/psb_z_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_inner_vect_sv + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_inner_vect_sv #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -78,4 +78,4 @@ subroutine psb_z_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hlg_inner_vect_sv +end subroutine psb_z_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_z_hlg_mold.F90 b/cuda/impl/psb_z_cuda_hlg_mold.F90 similarity index 89% rename from cuda/impl/psb_z_hlg_mold.F90 rename to cuda/impl/psb_z_cuda_hlg_mold.F90 index f9ff0c7a..cc9ad510 100644 --- a/cuda/impl/psb_z_hlg_mold.F90 +++ b/cuda/impl/psb_z_cuda_hlg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_hlg_mold(a,b,info) +subroutine psb_z_cuda_hlg_mold(a,b,info) use psb_base_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_mold + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_mold implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer, intent(out) :: info Integer :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_hlg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_hlg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_hlg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -61,4 +61,4 @@ subroutine psb_z_hlg_mold(a,b,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_z_hlg_mold +end subroutine psb_z_cuda_hlg_mold diff --git a/cuda/impl/psb_z_hlg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_z_hlg_reallocate_nz.F90 rename to cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 index f3d50626..aaba9be5 100644 --- a/cuda/impl/psb_z_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_z_hlg_reallocate_nz(nz,a) +subroutine psb_z_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_reallocate_nz + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_reallocate_nz #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='z_hlg_reallocate_nz' + character(len=20) :: name='z_cuda_hlg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -64,4 +64,4 @@ subroutine psb_z_hlg_reallocate_nz(nz,a) return -end subroutine psb_z_hlg_reallocate_nz +end subroutine psb_z_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_z_hlg_scal.F90 b/cuda/impl/psb_z_cuda_hlg_scal.F90 similarity index 91% rename from cuda/impl/psb_z_hlg_scal.F90 rename to cuda/impl/psb_z_cuda_hlg_scal.F90 index 8aa85500..3ffda36a 100644 --- a/cuda/impl/psb_z_hlg_scal.F90 +++ b/cuda/impl/psb_z_cuda_hlg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_hlg_scal(d,a,info,side) +subroutine psb_z_cuda_hlg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_scal + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_scal #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,4 +72,4 @@ subroutine psb_z_hlg_scal(d,a,info,side) return -end subroutine psb_z_hlg_scal +end subroutine psb_z_cuda_hlg_scal diff --git a/cuda/impl/psb_z_hlg_scals.F90 b/cuda/impl/psb_z_cuda_hlg_scals.F90 similarity index 91% rename from cuda/impl/psb_z_hlg_scals.F90 rename to cuda/impl/psb_z_cuda_hlg_scals.F90 index d5689c06..bae50c7c 100644 --- a/cuda/impl/psb_z_hlg_scals.F90 +++ b/cuda/impl/psb_z_cuda_hlg_scals.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hlg_scals(d,a,info) +subroutine psb_z_cuda_hlg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_scals + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_scals #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_z_hlg_scals(d,a,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_z_hlg_scals +end subroutine psb_z_cuda_hlg_scals diff --git a/cuda/impl/psb_z_hlg_to_gpu.F90 b/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_z_hlg_to_gpu.F90 rename to cuda/impl/psb_z_cuda_hlg_to_gpu.F90 index d63aee9c..93c9f043 100644 --- a/cuda/impl/psb_z_hlg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hlg_to_gpu(a,info,nzrm) +subroutine psb_z_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_to_gpu + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_to_gpu #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -65,4 +65,4 @@ subroutine psb_z_hlg_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_z_hlg_to_gpu +end subroutine psb_z_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_z_hlg_vect_mv.F90 b/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_z_hlg_vect_mv.F90 rename to cuda/impl/psb_z_cuda_hlg_vect_mv.F90 index 9efefc0a..f377efec 100644 --- a/cuda/impl/psb_z_hlg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_vect_mv + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_vect_mv #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hlg_vect_mv' + character(len=20) :: name='z_cuda_hlg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -126,4 +126,4 @@ subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hlg_vect_mv +end subroutine psb_z_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_z_hybg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_z_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 index 2c38c536..0c6f9aa9 100644 --- a/cuda/impl/psb_z_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_allocate_mnnz(m,n,a,nz) +subroutine psb_z_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_allocate_mnnz + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_allocate_mnnz #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,5 +65,5 @@ subroutine psb_z_hybg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_z_hybg_allocate_mnnz +end subroutine psb_z_cuda_hybg_allocate_mnnz #endif diff --git a/cuda/impl/psb_z_hybg_csmm.F90 b/cuda/impl/psb_z_cuda_hybg_csmm.F90 similarity index 93% rename from cuda/impl/psb_z_hybg_csmm.F90 rename to cuda/impl/psb_z_cuda_hybg_csmm.F90 index 5ec9701b..d4a32420 100644 --- a/cuda/impl/psb_z_hybg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_hybg_csmm.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_csmm + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_csmm #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hybg_csmm' + character(len=20) :: name='z_cuda_hybg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,5 +131,5 @@ subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hybg_csmm +end subroutine psb_z_cuda_hybg_csmm #endif diff --git a/cuda/impl/psb_z_hybg_csmv.F90 b/cuda/impl/psb_z_cuda_hybg_csmv.F90 similarity index 93% rename from cuda/impl/psb_z_hybg_csmv.F90 rename to cuda/impl/psb_z_cuda_hybg_csmv.F90 index e7f39cb6..180a8ae1 100644 --- a/cuda/impl/psb_z_hybg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_hybg_csmv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_csmv + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_csmv #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_z_hybg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hybg_csmv' + character(len=20) :: name='z_cuda_hybg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -134,5 +134,5 @@ subroutine psb_z_hybg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hybg_csmv +end subroutine psb_z_cuda_hybg_csmv #endif diff --git a/cuda/impl/psb_z_hybg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_z_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 index 35c55ec7..1df47788 100644 --- a/cuda/impl/psb_z_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_inner_vect_sv + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_inner_vect_sv #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -52,7 +52,7 @@ subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='z_hybg_inner_vect_sv' + character(len=20) :: name='z_cuda_hybg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -84,9 +84,9 @@ subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= zzero) then if (yy%is_host()) call yy%sync() @@ -134,5 +134,5 @@ subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hybg_inner_vect_sv +end subroutine psb_z_cuda_hybg_inner_vect_sv #endif diff --git a/cuda/impl/psb_z_hybg_mold.F90 b/cuda/impl/psb_z_cuda_hybg_mold.F90 similarity index 89% rename from cuda/impl/psb_z_hybg_mold.F90 rename to cuda/impl/psb_z_cuda_hybg_mold.F90 index 3a17dbd2..5a13ff19 100644 --- a/cuda/impl/psb_z_hybg_mold.F90 +++ b/cuda/impl/psb_z_cuda_hybg_mold.F90 @@ -30,12 +30,12 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_mold(a,b,info) +subroutine psb_z_cuda_hybg_mold(a,b,info) use psb_base_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_mold + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_mold implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_hybg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_hybg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_hybg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,5 +62,5 @@ subroutine psb_z_hybg_mold(a,b,info) return -end subroutine psb_z_hybg_mold +end subroutine psb_z_cuda_hybg_mold #endif diff --git a/cuda/impl/psb_z_hybg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 similarity index 88% rename from cuda/impl/psb_z_hybg_reallocate_nz.F90 rename to cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 index 79d81911..5278ba35 100644 --- a/cuda/impl/psb_z_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_reallocate_nz(nz,a) +subroutine psb_z_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_reallocate_nz + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_reallocate_nz #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='z_hybg_reallocate_nz' + character(len=20) :: name='z_cuda_hybg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,5 +67,5 @@ subroutine psb_z_hybg_reallocate_nz(nz,a) return -end subroutine psb_z_hybg_reallocate_nz +end subroutine psb_z_cuda_hybg_reallocate_nz #endif diff --git a/cuda/impl/psb_z_hybg_scal.F90 b/cuda/impl/psb_z_cuda_hybg_scal.F90 similarity index 91% rename from cuda/impl/psb_z_hybg_scal.F90 rename to cuda/impl/psb_z_cuda_hybg_scal.F90 index c8179bf2..cd436e76 100644 --- a/cuda/impl/psb_z_hybg_scal.F90 +++ b/cuda/impl/psb_z_cuda_hybg_scal.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_scal(d,a,info,side) +subroutine psb_z_cuda_hybg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_scal + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_scal #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,5 +72,5 @@ subroutine psb_z_hybg_scal(d,a,info,side) return -end subroutine psb_z_hybg_scal +end subroutine psb_z_cuda_hybg_scal #endif diff --git a/cuda/impl/psb_z_hybg_scals.F90 b/cuda/impl/psb_z_cuda_hybg_scals.F90 similarity index 91% rename from cuda/impl/psb_z_hybg_scals.F90 rename to cuda/impl/psb_z_cuda_hybg_scals.F90 index 3729412d..0a9ee79d 100644 --- a/cuda/impl/psb_z_hybg_scals.F90 +++ b/cuda/impl/psb_z_cuda_hybg_scals.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_scals(d,a,info) +subroutine psb_z_cuda_hybg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_scals + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_scals #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -72,5 +72,5 @@ subroutine psb_z_hybg_scals(d,a,info) return -end subroutine psb_z_hybg_scals +end subroutine psb_z_cuda_hybg_scals #endif diff --git a/cuda/impl/psb_z_hybg_to_gpu.F90 b/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 similarity index 96% rename from cuda/impl/psb_z_hybg_to_gpu.F90 rename to cuda/impl/psb_z_cuda_hybg_to_gpu.F90 index 4a2a9b1c..107b5049 100644 --- a/cuda/impl/psb_z_hybg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_to_gpu(a,info,nzrm) +subroutine psb_z_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_to_gpu + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_to_gpu #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -150,5 +150,5 @@ subroutine psb_z_hybg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_z_hybg_to_gpu +end subroutine psb_z_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_z_hybg_vect_mv.F90 b/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_z_hybg_vect_mv.F90 rename to cuda/impl/psb_z_cuda_hybg_vect_mv.F90 index f3b6695e..22751f2d 100644 --- a/cuda/impl/psb_z_hybg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 @@ -30,20 +30,20 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_vect_mv + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_vect_mv #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -53,7 +53,7 @@ subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hybg_vect_mv' + character(len=20) :: name='z_cuda_hybg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= zzero) then if (yy%is_host()) call yy%sync() @@ -123,5 +123,5 @@ subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hybg_vect_mv +end subroutine psb_z_cuda_hybg_vect_mv #endif diff --git a/cuda/impl/psb_z_mv_csrg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_mv_csrg_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 index 21771b89..d5390ee3 100644 --- a/cuda/impl/psb_z_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_mv_csrg_from_coo(a,b,info) +subroutine psb_z_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_mv_csrg_from_coo + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_mv_csrg_from_coo #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -62,4 +62,4 @@ subroutine psb_z_mv_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_mv_csrg_from_coo +end subroutine psb_z_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_z_mv_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_z_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 index 31408214..e2bfdb73 100644 --- a/cuda/impl/psb_z_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_mv_csrg_from_fmt(a,b,info) +subroutine psb_z_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_mv_csrg_from_fmt + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_mv_csrg_from_fmt #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer, intent(out) :: info @@ -60,4 +60,4 @@ subroutine psb_z_mv_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_z_mv_csrg_from_fmt +end subroutine psb_z_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_z_mv_diag_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_mv_diag_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 index 8872c890..b61813bf 100644 --- a/cuda/impl/psb_z_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_z_mv_diag_from_coo(a,b,info) +subroutine psb_z_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_mv_diag_from_coo + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_mv_diag_from_coo #else - use psb_z_diag_mat_mod + use psb_z_cuda_diag_mat_mod #endif implicit none - class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -66,4 +66,4 @@ subroutine psb_z_mv_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_mv_diag_from_coo +end subroutine psb_z_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_z_mv_elg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_mv_elg_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 index 2d78edc6..e3ff4036 100644 --- a/cuda/impl/psb_z_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_mv_elg_from_coo(a,b,info) +subroutine psb_z_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_mv_elg_from_coo + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_mv_elg_from_coo #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_z_mv_elg_from_coo(a,b,info) return -end subroutine psb_z_mv_elg_from_coo +end subroutine psb_z_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_z_mv_elg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 similarity index 92% rename from cuda/impl/psb_z_mv_elg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 index 3bf663b3..07a80173 100644 --- a/cuda/impl/psb_z_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_mv_elg_from_fmt(a,b,info) +subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_mv_elg_from_fmt + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_mv_elg_from_fmt #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -96,4 +96,4 @@ subroutine psb_z_mv_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_z_mv_elg_from_fmt +end subroutine psb_z_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_z_mv_hdiag_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_z_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 index e1df9cc4..f25e6370 100644 --- a/cuda/impl/psb_z_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_z_mv_hdiag_from_coo(a,b,info) +subroutine psb_z_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_mv_hdiag_from_coo - use psb_gpu_env_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_z_hdiag_mat_mod + use psb_z_cuda_hdiag_mat_mod #endif implicit none - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_z_mv_hdiag_from_coo(a,b,info) #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_z_hdia_sparse_mat%mv_from_coo(b,info) @@ -71,4 +71,4 @@ subroutine psb_z_mv_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_mv_hdiag_from_coo +end subroutine psb_z_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_z_mv_hlg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 similarity index 88% rename from cuda/impl/psb_z_mv_hlg_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 index ce037be2..3bc630de 100644 --- a/cuda/impl/psb_z_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_z_mv_hlg_from_coo(a,b,info) +subroutine psb_z_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_mv_hlg_from_coo + use psb_cuda_env_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_mv_hlg_from_coo #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_z_mv_hlg_from_coo(a,b,info) return -end subroutine psb_z_mv_hlg_from_coo +end subroutine psb_z_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_z_mv_hlg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_z_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 index 4ea1b385..d746a341 100644 --- a/cuda/impl/psb_z_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_mv_hlg_from_fmt(a,b,info) +subroutine psb_z_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_mv_hlg_from_fmt + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_mv_hlg_from_fmt #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_z_mv_hlg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_z_mv_hlg_from_fmt +end subroutine psb_z_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_z_mv_hybg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_mv_hybg_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 index 3424caea..7d0d9eec 100644 --- a/cuda/impl/psb_z_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_mv_hybg_from_coo(a,b,info) +subroutine psb_z_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_mv_hybg_from_coo + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_mv_hybg_from_coo #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -61,5 +61,5 @@ subroutine psb_z_mv_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_mv_hybg_from_coo +end subroutine psb_z_cuda_mv_hybg_from_coo #endif diff --git a/cuda/impl/psb_z_mv_hybg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_z_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 index 90c35897..7bfc27e3 100644 --- a/cuda/impl/psb_z_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_mv_hybg_from_fmt(a,b,info) +subroutine psb_z_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_mv_hybg_from_fmt + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_mv_hybg_from_fmt #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_z_mv_hybg_from_fmt(a,b,info) call a%to_gpu(info) #endif end select -end subroutine psb_z_mv_hybg_from_fmt +end subroutine psb_z_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/psb_c_csrg_mat_mod.F90 b/cuda/psb_c_csrg_mat_mod.F90 deleted file mode 100644 index 203a6dbf..00000000 --- a/cuda/psb_c_csrg_mat_mod.F90 +++ /dev/null @@ -1,393 +0,0 @@ -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! - - -module psb_c_csrg_mat_mod - - use iso_c_binding - use psb_c_mat_mod - use cusparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_c_csr_sparse_mat) :: psb_c_csrg_sparse_mat - ! - ! cuSPARSE 4.0 CSR format. - ! - ! - ! - ! - ! -#ifdef HAVE_SPGPU - type(c_Cmat) :: deviceMat - integer(psb_ipk_) :: devstate = is_host - - contains - procedure, nopass :: get_fmt => c_csrg_get_fmt - procedure, pass(a) :: sizeof => c_csrg_sizeof - procedure, pass(a) :: vect_mv => psb_c_csrg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_c_csrg_inner_vect_sv - procedure, pass(a) :: csmm => psb_c_csrg_csmm - procedure, pass(a) :: csmv => psb_c_csrg_csmv - procedure, pass(a) :: scals => psb_c_csrg_scals - procedure, pass(a) :: scalv => psb_c_csrg_scal - procedure, pass(a) :: reallocate_nz => psb_c_csrg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_c_csrg_allocate_mnnz - ! Note: we do *not* need the TO methods, because the parent type - ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_csrg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_cp_csrg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_csrg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_mv_csrg_from_fmt - procedure, pass(a) :: free => c_csrg_free - procedure, pass(a) :: mold => psb_c_csrg_mold - procedure, pass(a) :: is_host => c_csrg_is_host - procedure, pass(a) :: is_dev => c_csrg_is_dev - procedure, pass(a) :: is_sync => c_csrg_is_sync - procedure, pass(a) :: set_host => c_csrg_set_host - procedure, pass(a) :: set_dev => c_csrg_set_dev - procedure, pass(a) :: set_sync => c_csrg_set_sync - procedure, pass(a) :: sync => c_csrg_sync - procedure, pass(a) :: to_gpu => psb_c_csrg_to_gpu - procedure, pass(a) :: from_gpu => psb_c_csrg_from_gpu - final :: c_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_csrg_mold -#endif - end type psb_c_csrg_sparse_mat - -#ifdef HAVE_SPGPU - private :: c_csrg_get_nzeros, c_csrg_free, c_csrg_get_fmt, & - & c_csrg_get_size, c_csrg_sizeof, c_csrg_get_nz_row - - - interface - subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta - class(psb_c_base_vect_type), intent(inout) :: x - class(psb_c_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_csrg_inner_vect_sv - end interface - - - interface - subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta - class(psb_c_base_vect_type), intent(inout) :: x - class(psb_c_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_csrg_vect_mv - end interface - - interface - subroutine psb_c_csrg_reallocate_nz(nz,a) - import :: psb_c_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: nz - class(psb_c_csrg_sparse_mat), intent(inout) :: a - end subroutine psb_c_csrg_reallocate_nz - end interface - - interface - subroutine psb_c_csrg_allocate_mnnz(m,n,a,nz) - import :: psb_c_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: m,n - class(psb_c_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_c_csrg_allocate_mnnz - end interface - - interface - subroutine psb_c_csrg_mold(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_csrg_mold - end interface - - interface - subroutine psb_c_csrg_to_gpu(a,info, nzrm) - import :: psb_c_csrg_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_c_csrg_to_gpu - end interface - - interface - subroutine psb_c_csrg_from_gpu(a,info) - import :: psb_c_csrg_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_csrg_from_gpu - end interface - - interface - subroutine psb_c_cp_csrg_from_coo(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - class(psb_c_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_csrg_from_coo - end interface - - interface - subroutine psb_c_cp_csrg_from_fmt(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - class(psb_c_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_csrg_from_fmt - end interface - - interface - subroutine psb_c_mv_csrg_from_coo(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - class(psb_c_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_csrg_from_coo - end interface - - interface - subroutine psb_c_mv_csrg_from_fmt(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - class(psb_c_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_csrg_from_fmt - end interface - - interface - subroutine psb_c_csrg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta, x(:) - complex(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_csrg_csmv - end interface - interface - subroutine psb_c_csrg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) - complex(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_csrg_csmm - end interface - - interface - subroutine psb_c_csrg_scal(d,a,info,side) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - complex(psb_spk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side - end subroutine psb_c_csrg_scal - end interface - - interface - subroutine psb_c_csrg_scals(d,a,info) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - complex(psb_spk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_csrg_scals - end interface - - -contains - - ! == =================================== - ! - ! - ! - ! Getters - ! - ! - ! - ! - ! - ! == =================================== - - - function c_csrg_sizeof(a) result(res) - implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - res = 8 - res = res + (2*psb_sizeof_sp) * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - ! Should we account for the shadow data structure - ! on the GPU device side? - ! res = 2*res - - end function c_csrg_sizeof - - function c_csrg_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSRG' - end function c_csrg_get_fmt - - - - ! == =================================== - ! - ! - ! - ! Data management - ! - ! - ! - ! - ! - ! == =================================== - - - subroutine c_csrg_set_host(a) - implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine c_csrg_set_host - - subroutine c_csrg_set_dev(a) - implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine c_csrg_set_dev - - subroutine c_csrg_set_sync(a) - implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine c_csrg_set_sync - - function c_csrg_is_dev(a) result(res) - implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function c_csrg_is_dev - - function c_csrg_is_host(a) result(res) - implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function c_csrg_is_host - - function c_csrg_is_sync(a) result(res) - implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function c_csrg_is_sync - - - subroutine c_csrg_sync(a) - implicit none - class(psb_c_csrg_sparse_mat), target, intent(in) :: a - class(psb_c_csrg_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (tmpa%is_host()) then - call tmpa%to_gpu(info) - else if (tmpa%is_dev()) then - call tmpa%from_gpu(info) - end if - call tmpa%set_sync() - return - - end subroutine c_csrg_sync - - subroutine c_csrg_free(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - class(psb_c_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - call a%psb_c_csr_sparse_mat%free() - - return - - end subroutine c_csrg_free - - subroutine c_csrg_finalize(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - type(psb_c_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - - return - - end subroutine c_csrg_finalize - -#else - interface - subroutine psb_c_csrg_mold(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_csrg_mold - end interface - -#endif - -end module psb_c_csrg_mat_mod diff --git a/cuda/psb_c_cuda_csrg_mat_mod.F90 b/cuda/psb_c_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..a98d7e99 --- /dev/null +++ b/cuda/psb_c_cuda_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_csrg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_csr_sparse_mat) :: psb_c_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(c_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => c_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_c_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_c_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => c_cuda_csrg_free + procedure, pass(a) :: mold => psb_c_cuda_csrg_mold + procedure, pass(a) :: is_host => c_cuda_csrg_is_host + procedure, pass(a) :: is_dev => c_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => c_cuda_csrg_is_sync + procedure, pass(a) :: set_host => c_cuda_csrg_set_host + procedure, pass(a) :: set_dev => c_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => c_cuda_csrg_set_sync + procedure, pass(a) :: sync => c_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_c_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_c_cuda_csrg_from_gpu + final :: c_cuda_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_cuda_csrg_mold +#endif + end type psb_c_cuda_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_cuda_csrg_get_nzeros, c_cuda_csrg_free, c_cuda_csrg_get_fmt, & + & c_cuda_csrg_get_size, c_cuda_csrg_sizeof, c_cuda_csrg_get_nz_row + + + interface + subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_c_cuda_csrg_reallocate_nz(nz,a) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_c_cuda_csrg_mold(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_mold + end interface + + interface + subroutine psb_c_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_c_cuda_csrg_from_gpu(a,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_c_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_c_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_csmv + end interface + interface + subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_csmm + end interface + + interface + subroutine psb_c_cuda_csrg_scal(d,a,info,side) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_cuda_csrg_scal + end interface + + interface + subroutine psb_c_cuda_csrg_scals(d,a,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_cuda_csrg_sizeof + + function c_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function c_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine c_cuda_csrg_set_host(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_cuda_csrg_set_host + + subroutine c_cuda_csrg_set_dev(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_cuda_csrg_set_dev + + subroutine c_cuda_csrg_set_sync(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_cuda_csrg_set_sync + + function c_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_cuda_csrg_is_dev + + function c_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_cuda_csrg_is_host + + function c_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_cuda_csrg_is_sync + + + subroutine c_cuda_csrg_sync(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine c_cuda_csrg_sync + + subroutine c_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_c_csr_sparse_mat%free() + + return + + end subroutine c_cuda_csrg_free + + subroutine c_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine c_cuda_csrg_finalize + +#else + interface + subroutine psb_c_cuda_csrg_mold(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_mold + end interface + +#endif + +end module psb_c_cuda_csrg_mat_mod diff --git a/cuda/psb_c_diag_mat_mod.F90 b/cuda/psb_c_cuda_diag_mat_mod.F90 similarity index 52% rename from cuda/psb_c_diag_mat_mod.F90 rename to cuda/psb_c_cuda_diag_mat_mod.F90 index a7ab2fbb..1d5db05b 100644 --- a/cuda/psb_c_diag_mat_mod.F90 +++ b/cuda/psb_c_cuda_diag_mat_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_c_diag_mat_mod +module psb_c_cuda_diag_mat_mod use iso_c_binding use psb_base_mod use psb_c_dia_mat_mod - type, extends(psb_c_dia_sparse_mat) :: psb_c_diag_sparse_mat + type, extends(psb_c_dia_sparse_mat) :: psb_c_cuda_diag_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -48,170 +48,170 @@ module psb_c_diag_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => c_diag_get_fmt - procedure, pass(a) :: sizeof => c_diag_sizeof - procedure, pass(a) :: vect_mv => psb_c_diag_vect_mv -! procedure, pass(a) :: csmm => psb_c_diag_csmm - procedure, pass(a) :: csmv => psb_c_diag_csmv -! procedure, pass(a) :: in_vect_sv => psb_c_diag_inner_vect_sv -! procedure, pass(a) :: scals => psb_c_diag_scals -! procedure, pass(a) :: scalv => psb_c_diag_scal -! procedure, pass(a) :: reallocate_nz => psb_c_diag_reallocate_nz -! procedure, pass(a) :: allocate_mnnz => psb_c_diag_allocate_mnnz + procedure, nopass :: get_fmt => c_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => c_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_c_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_c_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_c_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_c_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_c_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_c_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_c_cuda_diag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_diag_from_coo -! procedure, pass(a) :: cp_from_fmt => psb_c_cp_diag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_diag_from_coo -! procedure, pass(a) :: mv_from_fmt => psb_c_mv_diag_from_fmt - procedure, pass(a) :: free => c_diag_free - procedure, pass(a) :: mold => psb_c_diag_mold - procedure, pass(a) :: to_gpu => psb_c_diag_to_gpu - final :: c_diag_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => c_cuda_diag_free + procedure, pass(a) :: mold => psb_c_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_diag_to_gpu + final :: c_cuda_diag_finalize #else contains - procedure, pass(a) :: mold => psb_c_diag_mold + procedure, pass(a) :: mold => psb_c_cuda_diag_mold #endif - end type psb_c_diag_sparse_mat + end type psb_c_cuda_diag_sparse_mat #ifdef HAVE_SPGPU - private :: c_diag_get_nzeros, c_diag_free, c_diag_get_fmt, & - & c_diag_get_size, c_diag_sizeof, c_diag_get_nz_row + private :: c_cuda_diag_get_nzeros, c_cuda_diag_free, c_cuda_diag_get_fmt, & + & c_cuda_diag_get_size, c_cuda_diag_sizeof, c_cuda_diag_get_nz_row interface - subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_diag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_diag_vect_mv + end subroutine psb_c_cuda_diag_vect_mv end interface interface - subroutine psb_c_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_c_diag_sparse_mat, psb_spk_, psb_c_base_vect_type - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_cuda_diag_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_diag_inner_vect_sv + end subroutine psb_c_cuda_diag_inner_vect_sv end interface interface - subroutine psb_c_diag_reallocate_nz(nz,a) - import :: psb_c_diag_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_diag_reallocate_nz(nz,a) + import :: psb_c_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_c_diag_sparse_mat), intent(inout) :: a - end subroutine psb_c_diag_reallocate_nz + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_diag_reallocate_nz end interface interface - subroutine psb_c_diag_allocate_mnnz(m,n,a,nz) - import :: psb_c_diag_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_c_diag_allocate_mnnz + end subroutine psb_c_cuda_diag_allocate_mnnz end interface interface - subroutine psb_c_diag_mold(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_mold(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_diag_mold + end subroutine psb_c_cuda_diag_mold end interface interface - subroutine psb_c_diag_to_gpu(a,info, nzrm) - import :: psb_c_diag_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_c_cuda_diag_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_c_diag_to_gpu + end subroutine psb_c_cuda_diag_to_gpu end interface interface - subroutine psb_c_cp_diag_from_coo(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_diag_from_coo(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_diag_from_coo + end subroutine psb_c_cuda_cp_diag_from_coo end interface interface - subroutine psb_c_cp_diag_from_fmt(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_diag_from_fmt + end subroutine psb_c_cuda_cp_diag_from_fmt end interface interface - subroutine psb_c_mv_diag_from_coo(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_diag_from_coo(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_diag_from_coo + end subroutine psb_c_cuda_mv_diag_from_coo end interface interface - subroutine psb_c_mv_diag_from_fmt(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_diag_from_fmt + end subroutine psb_c_cuda_mv_diag_from_fmt end interface interface - subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_diag_csmv + end subroutine psb_c_cuda_diag_csmv end interface interface - subroutine psb_c_diag_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_diag_csmm + end subroutine psb_c_cuda_diag_csmm end interface interface - subroutine psb_c_diag_scal(d,a,info, side) - import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_diag_scal(d,a,info, side) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_c_diag_scal + end subroutine psb_c_cuda_diag_scal end interface interface - subroutine psb_c_diag_scals(d,a,info) - import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_diag_scals(d,a,info) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_diag_scals + end subroutine psb_c_cuda_diag_scals end interface @@ -230,9 +230,9 @@ contains ! == =================================== - function c_diag_sizeof(a) result(res) + function c_cuda_diag_sizeof(a) result(res) implicit none - class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function c_diag_sizeof + end function c_cuda_diag_sizeof - function c_diag_get_fmt() result(res) + function c_cuda_diag_get_fmt() result(res) implicit none character(len=5) :: res res = 'DIAG' - end function c_diag_get_fmt + end function c_cuda_diag_get_fmt @@ -265,11 +265,11 @@ contains ! ! == =================================== - subroutine c_diag_free(a) + subroutine c_cuda_diag_free(a) use diagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) @@ -278,31 +278,31 @@ contains return - end subroutine c_diag_free + end subroutine c_cuda_diag_free - subroutine c_diag_finalize(a) + subroutine c_cuda_diag_finalize(a) use diagdev_mod implicit none - type(psb_c_diag_sparse_mat), intent(inout) :: a + type(psb_c_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine c_diag_finalize + end subroutine c_cuda_diag_finalize #else interface - subroutine psb_c_diag_mold(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_mold(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_diag_mold + end subroutine psb_c_cuda_diag_mold end interface #endif -end module psb_c_diag_mat_mod +end module psb_c_cuda_diag_mat_mod diff --git a/cuda/psb_c_dnsg_mat_mod.F90 b/cuda/psb_c_cuda_dnsg_mat_mod.F90 similarity index 51% rename from cuda/psb_c_dnsg_mat_mod.F90 rename to cuda/psb_c_cuda_dnsg_mat_mod.F90 index 7fe5fdda..e89e117b 100644 --- a/cuda/psb_c_dnsg_mat_mod.F90 +++ b/cuda/psb_c_cuda_dnsg_mat_mod.F90 @@ -30,14 +30,14 @@ ! -module psb_c_dnsg_mat_mod +module psb_c_cuda_dnsg_mat_mod use iso_c_binding use psb_c_mat_mod use psb_c_dns_mat_mod use dnsdev_mod - type, extends(psb_c_dns_sparse_mat) :: psb_c_dnsg_sparse_mat + type, extends(psb_c_dns_sparse_mat) :: psb_c_cuda_dnsg_sparse_mat ! ! ITPACK/DNS format, extended. ! We are adding here the routines to create a copy of the data @@ -49,169 +49,169 @@ module psb_c_dnsg_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => c_dnsg_get_fmt - ! procedure, pass(a) :: sizeof => c_dnsg_sizeof - procedure, pass(a) :: vect_mv => psb_c_dnsg_vect_mv -!!$ procedure, pass(a) :: csmm => psb_c_dnsg_csmm -!!$ procedure, pass(a) :: csmv => psb_c_dnsg_csmv -!!$ procedure, pass(a) :: in_vect_sv => psb_c_dnsg_inner_vect_sv -!!$ procedure, pass(a) :: scals => psb_c_dnsg_scals -!!$ procedure, pass(a) :: scalv => psb_c_dnsg_scal -!!$ procedure, pass(a) :: reallocate_nz => psb_c_dnsg_reallocate_nz -!!$ procedure, pass(a) :: allocate_mnnz => psb_c_dnsg_allocate_mnnz + procedure, nopass :: get_fmt => c_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => c_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_c_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_c_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_c_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_c_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_c_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_c_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_c_cuda_dnsg_allocate_mnnz ! Note: we *do* need the TO methods, because of the need to invoke SYNC ! - procedure, pass(a) :: cp_from_coo => psb_c_cp_dnsg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_cp_dnsg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_dnsg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_mv_dnsg_from_fmt - procedure, pass(a) :: free => c_dnsg_free - procedure, pass(a) :: mold => psb_c_dnsg_mold - procedure, pass(a) :: to_gpu => psb_c_dnsg_to_gpu - final :: c_dnsg_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => c_cuda_dnsg_free + procedure, pass(a) :: mold => psb_c_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_dnsg_to_gpu + final :: c_cuda_dnsg_finalize #else contains - procedure, pass(a) :: mold => psb_c_dnsg_mold + procedure, pass(a) :: mold => psb_c_cuda_dnsg_mold #endif - end type psb_c_dnsg_sparse_mat + end type psb_c_cuda_dnsg_sparse_mat #ifdef HAVE_SPGPU - private :: c_dnsg_get_nzeros, c_dnsg_free, c_dnsg_get_fmt, & - & c_dnsg_get_size, c_dnsg_get_nz_row + private :: c_cuda_dnsg_get_nzeros, c_cuda_dnsg_free, c_cuda_dnsg_get_fmt, & + & c_cuda_dnsg_get_size, c_cuda_dnsg_get_nz_row interface - subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_dnsg_vect_mv + end subroutine psb_c_cuda_dnsg_vect_mv end interface !!$ !!$ interface -!!$ subroutine psb_c_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_c_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type -!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_c_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_dnsg_inner_vect_sv +!!$ end subroutine psb_c_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_c_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_c_dnsg_reallocate_nz +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_c_dnsg_allocate_mnnz +!!$ end subroutine psb_c_cuda_dnsg_allocate_mnnz !!$ end interface interface - subroutine psb_c_dnsg_mold(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_dnsg_mold(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_dnsg_mold + end subroutine psb_c_cuda_dnsg_mold end interface interface - subroutine psb_c_dnsg_to_gpu(a,info) - import :: psb_c_dnsg_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_dnsg_to_gpu(a,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_dnsg_to_gpu + end subroutine psb_c_cuda_dnsg_to_gpu end interface interface - subroutine psb_c_cp_dnsg_from_coo(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_dnsg_from_coo + end subroutine psb_c_cuda_cp_dnsg_from_coo end interface interface - subroutine psb_c_cp_dnsg_from_fmt(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_dnsg_from_fmt + end subroutine psb_c_cuda_cp_dnsg_from_fmt end interface interface - subroutine psb_c_mv_dnsg_from_coo(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_dnsg_from_coo + end subroutine psb_c_cuda_mv_dnsg_from_coo end interface interface - subroutine psb_c_mv_dnsg_from_fmt(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_dnsg_from_fmt + end subroutine psb_c_cuda_mv_dnsg_from_fmt end interface !!$ interface -!!$ subroutine psb_c_dnsg_csmv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:) !!$ complex(psb_spk_), intent(inout) :: y(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_dnsg_csmv +!!$ end subroutine psb_c_cuda_dnsg_csmv !!$ end interface !!$ interface -!!$ subroutine psb_c_dnsg_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) !!$ complex(psb_spk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_dnsg_csmm +!!$ end subroutine psb_c_cuda_dnsg_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_dnsg_scal(d,a,info, side) -!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ complex(psb_spk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_c_dnsg_scal +!!$ end subroutine psb_c_cuda_dnsg_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_dnsg_scals(d,a,info) -!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ complex(psb_spk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_c_dnsg_scals +!!$ end subroutine psb_c_cuda_dnsg_scals !!$ end interface !!$ @@ -231,11 +231,11 @@ contains - function c_dnsg_get_fmt() result(res) + function c_cuda_dnsg_get_fmt() result(res) implicit none character(len=5) :: res res = 'DNSG' - end function c_dnsg_get_fmt + end function c_cuda_dnsg_get_fmt @@ -251,11 +251,11 @@ contains ! ! == =================================== - subroutine c_dnsg_free(a) + subroutine c_cuda_dnsg_free(a) use dnsdev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) @@ -264,31 +264,31 @@ contains return - end subroutine c_dnsg_free + end subroutine c_cuda_dnsg_free - subroutine c_dnsg_finalize(a) + subroutine c_cuda_dnsg_finalize(a) use dnsdev_mod implicit none - type(psb_c_dnsg_sparse_mat), intent(inout) :: a + type(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine c_dnsg_finalize + end subroutine c_cuda_dnsg_finalize #else interface - subroutine psb_c_dnsg_mold(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_dnsg_mold(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_dnsg_mold + end subroutine psb_c_cuda_dnsg_mold end interface #endif -end module psb_c_dnsg_mat_mod +end module psb_c_cuda_dnsg_mat_mod diff --git a/cuda/psb_c_elg_mat_mod.F90 b/cuda/psb_c_cuda_elg_mat_mod.F90 similarity index 50% rename from cuda/psb_c_elg_mat_mod.F90 rename to cuda/psb_c_cuda_elg_mat_mod.F90 index 83355b9d..43250ce3 100644 --- a/cuda/psb_c_elg_mat_mod.F90 +++ b/cuda/psb_c_cuda_elg_mat_mod.F90 @@ -30,18 +30,18 @@ ! -module psb_c_elg_mat_mod +module psb_c_cuda_elg_mat_mod use iso_c_binding use psb_c_mat_mod use psb_c_ell_mat_mod - use psb_i_gpu_vect_mod + use psb_i_cuda_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_c_ell_sparse_mat) :: psb_c_elg_sparse_mat + type, extends(psb_c_ell_sparse_mat) :: psb_c_cuda_elg_sparse_mat ! ! ITPACK/ELL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,221 +54,221 @@ module psb_c_elg_mat_mod integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => c_elg_get_fmt - procedure, pass(a) :: sizeof => c_elg_sizeof - procedure, pass(a) :: vect_mv => psb_c_elg_vect_mv - procedure, pass(a) :: csmm => psb_c_elg_csmm - procedure, pass(a) :: csmv => psb_c_elg_csmv - procedure, pass(a) :: in_vect_sv => psb_c_elg_inner_vect_sv - procedure, pass(a) :: scals => psb_c_elg_scals - procedure, pass(a) :: scalv => psb_c_elg_scal - procedure, pass(a) :: reallocate_nz => psb_c_elg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_c_elg_allocate_mnnz - procedure, pass(a) :: reinit => c_elg_reinit + procedure, nopass :: get_fmt => c_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_c_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_c_cuda_elg_scals + procedure, pass(a) :: scalv => psb_c_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => c_cuda_elg_reinit ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_elg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_cp_elg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_elg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_mv_elg_from_fmt - procedure, pass(a) :: free => c_elg_free - procedure, pass(a) :: mold => psb_c_elg_mold - procedure, pass(a) :: csput_a => psb_c_elg_csput_a - procedure, pass(a) :: csput_v => psb_c_elg_csput_v - procedure, pass(a) :: is_host => c_elg_is_host - procedure, pass(a) :: is_dev => c_elg_is_dev - procedure, pass(a) :: is_sync => c_elg_is_sync - procedure, pass(a) :: set_host => c_elg_set_host - procedure, pass(a) :: set_dev => c_elg_set_dev - procedure, pass(a) :: set_sync => c_elg_set_sync - procedure, pass(a) :: sync => c_elg_sync - procedure, pass(a) :: from_gpu => psb_c_elg_from_gpu - procedure, pass(a) :: to_gpu => psb_c_elg_to_gpu - procedure, pass(a) :: asb => psb_c_elg_asb - final :: c_elg_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => c_cuda_elg_free + procedure, pass(a) :: mold => psb_c_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_c_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_c_cuda_elg_csput_v + procedure, pass(a) :: is_host => c_cuda_elg_is_host + procedure, pass(a) :: is_dev => c_cuda_elg_is_dev + procedure, pass(a) :: is_sync => c_cuda_elg_is_sync + procedure, pass(a) :: set_host => c_cuda_elg_set_host + procedure, pass(a) :: set_dev => c_cuda_elg_set_dev + procedure, pass(a) :: set_sync => c_cuda_elg_set_sync + procedure, pass(a) :: sync => c_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_c_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_c_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_c_cuda_elg_asb + final :: c_cuda_elg_finalize #else contains - procedure, pass(a) :: mold => psb_c_elg_mold - procedure, pass(a) :: asb => psb_c_elg_asb + procedure, pass(a) :: mold => psb_c_cuda_elg_mold + procedure, pass(a) :: asb => psb_c_cuda_elg_asb #endif - end type psb_c_elg_sparse_mat + end type psb_c_cuda_elg_sparse_mat #ifdef HAVE_SPGPU - private :: c_elg_get_nzeros, c_elg_free, c_elg_get_fmt, & - & c_elg_get_size, c_elg_sizeof, c_elg_get_nz_row, c_elg_sync + private :: c_cuda_elg_get_nzeros, c_cuda_elg_free, c_cuda_elg_get_fmt, & + & c_cuda_elg_get_size, c_cuda_elg_sizeof, c_cuda_elg_get_nz_row, c_cuda_elg_sync interface - subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_elg_vect_mv + end subroutine psb_c_cuda_elg_vect_mv end interface interface - subroutine psb_c_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_c_elg_sparse_mat, psb_spk_, psb_c_base_vect_type - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_cuda_elg_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_elg_inner_vect_sv + end subroutine psb_c_cuda_elg_inner_vect_sv end interface interface - subroutine psb_c_elg_reallocate_nz(nz,a) - import :: psb_c_elg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_elg_reallocate_nz(nz,a) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_c_elg_sparse_mat), intent(inout) :: a - end subroutine psb_c_elg_reallocate_nz + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_elg_reallocate_nz end interface interface - subroutine psb_c_elg_allocate_mnnz(m,n,a,nz) - import :: psb_c_elg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_c_elg_allocate_mnnz + end subroutine psb_c_cuda_elg_allocate_mnnz end interface interface - subroutine psb_c_elg_mold(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_mold(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_mold + end subroutine psb_c_cuda_elg_mold end interface interface - subroutine psb_c_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& & imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_csput_a + end subroutine psb_c_cuda_elg_csput_a end interface interface - subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_c_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_c_base_vect_type,& + subroutine psb_c_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_c_base_vect_type,& & psb_i_base_vect_type - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_csput_v + end subroutine psb_c_cuda_elg_csput_v end interface interface - subroutine psb_c_elg_from_gpu(a,info) - import :: psb_c_elg_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_elg_from_gpu(a,info) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_from_gpu + end subroutine psb_c_cuda_elg_from_gpu end interface interface - subroutine psb_c_elg_to_gpu(a,info, nzrm) - import :: psb_c_elg_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_c_elg_to_gpu + end subroutine psb_c_cuda_elg_to_gpu end interface interface - subroutine psb_c_cp_elg_from_coo(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_elg_from_coo + end subroutine psb_c_cuda_cp_elg_from_coo end interface interface - subroutine psb_c_cp_elg_from_fmt(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_elg_from_fmt + end subroutine psb_c_cuda_cp_elg_from_fmt end interface interface - subroutine psb_c_mv_elg_from_coo(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_elg_from_coo(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_elg_from_coo + end subroutine psb_c_cuda_mv_elg_from_coo end interface interface - subroutine psb_c_mv_elg_from_fmt(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_elg_from_fmt + end subroutine psb_c_cuda_mv_elg_from_fmt end interface interface - subroutine psb_c_elg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_elg_csmv + end subroutine psb_c_cuda_elg_csmv end interface interface - subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_elg_csmm + end subroutine psb_c_cuda_elg_csmm end interface interface - subroutine psb_c_elg_scal(d,a,info, side) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_elg_scal(d,a,info, side) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_c_elg_scal + end subroutine psb_c_cuda_elg_scal end interface interface - subroutine psb_c_elg_scals(d,a,info) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_elg_scals(d,a,info) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_scals + end subroutine psb_c_cuda_elg_scals end interface interface - subroutine psb_c_elg_asb(a) - import :: psb_c_elg_sparse_mat - class(psb_c_elg_sparse_mat), intent(inout) :: a - end subroutine psb_c_elg_asb + subroutine psb_c_cuda_elg_asb(a) + import :: psb_c_cuda_elg_sparse_mat + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_elg_asb end interface @@ -287,9 +287,9 @@ contains ! == =================================== - function c_elg_sizeof(a) result(res) + function c_cuda_elg_sizeof(a) result(res) implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res if (a%is_dev()) call a%sync() @@ -302,13 +302,13 @@ contains ! on the GPU device side? ! res = 2*res - end function c_elg_sizeof + end function c_cuda_elg_sizeof - function c_elg_get_fmt() result(res) + function c_cuda_elg_get_fmt() result(res) implicit none character(len=5) :: res res = 'ELG' - end function c_elg_get_fmt + end function c_cuda_elg_get_fmt @@ -323,12 +323,12 @@ contains ! ! ! == =================================== - subroutine c_elg_reinit(a,clear) + subroutine c_cuda_elg_reinit(a,clear) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a logical, intent(in), optional :: clear integer(psb_ipk_) :: isz, err_act character(len=20) :: name='reinit' @@ -367,14 +367,14 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine c_elg_reinit + end subroutine c_cuda_elg_reinit - subroutine c_elg_free(a) + subroutine c_cuda_elg_free(a) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) @@ -384,12 +384,12 @@ contains return - end subroutine c_elg_free + end subroutine c_cuda_elg_free - subroutine c_elg_sync(a) + subroutine c_cuda_elg_sync(a) implicit none - class(psb_c_elg_sparse_mat), target, intent(in) :: a - class(psb_c_elg_sparse_mat), pointer :: tmpa + class(psb_c_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -401,83 +401,83 @@ contains call tmpa%set_sync() return - end subroutine c_elg_sync + end subroutine c_cuda_elg_sync - subroutine c_elg_set_host(a) + subroutine c_cuda_elg_set_host(a) implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine c_elg_set_host + end subroutine c_cuda_elg_set_host - subroutine c_elg_set_dev(a) + subroutine c_cuda_elg_set_dev(a) implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine c_elg_set_dev + end subroutine c_cuda_elg_set_dev - subroutine c_elg_set_sync(a) + subroutine c_cuda_elg_set_sync(a) implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine c_elg_set_sync + end subroutine c_cuda_elg_set_sync - function c_elg_is_dev(a) result(res) + function c_cuda_elg_is_dev(a) result(res) implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function c_elg_is_dev + end function c_cuda_elg_is_dev - function c_elg_is_host(a) result(res) + function c_cuda_elg_is_host(a) result(res) implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function c_elg_is_host + end function c_cuda_elg_is_host - function c_elg_is_sync(a) result(res) + function c_cuda_elg_is_sync(a) result(res) implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function c_elg_is_sync + end function c_cuda_elg_is_sync - subroutine c_elg_finalize(a) + subroutine c_cuda_elg_finalize(a) use elldev_mod implicit none - type(psb_c_elg_sparse_mat), intent(inout) :: a + type(psb_c_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine c_elg_finalize + end subroutine c_cuda_elg_finalize #else interface - subroutine psb_c_elg_asb(a) - import :: psb_c_elg_sparse_mat - class(psb_c_elg_sparse_mat), intent(inout) :: a - end subroutine psb_c_elg_asb + subroutine psb_c_cuda_elg_asb(a) + import :: psb_c_cuda_elg_sparse_mat + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_elg_asb end interface interface - subroutine psb_c_elg_mold(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_mold(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_mold + end subroutine psb_c_cuda_elg_mold end interface #endif -end module psb_c_elg_mat_mod +end module psb_c_cuda_elg_mat_mod diff --git a/cuda/psb_c_hdiag_mat_mod.F90 b/cuda/psb_c_cuda_hdiag_mat_mod.F90 similarity index 50% rename from cuda/psb_c_hdiag_mat_mod.F90 rename to cuda/psb_c_cuda_hdiag_mat_mod.F90 index 8206abed..54f47684 100644 --- a/cuda/psb_c_hdiag_mat_mod.F90 +++ b/cuda/psb_c_cuda_hdiag_mat_mod.F90 @@ -30,182 +30,182 @@ ! -module psb_c_hdiag_mat_mod +module psb_c_cuda_hdiag_mat_mod use iso_c_binding use psb_base_mod use psb_c_hdia_mat_mod - type, extends(psb_c_hdia_sparse_mat) :: psb_c_hdiag_sparse_mat + type, extends(psb_c_hdia_sparse_mat) :: psb_c_cuda_hdiag_sparse_mat ! #ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => c_hdiag_get_fmt - ! procedure, pass(a) :: sizeof => c_hdiag_sizeof - procedure, pass(a) :: vect_mv => psb_c_hdiag_vect_mv - ! procedure, pass(a) :: csmm => psb_c_hdiag_csmm - procedure, pass(a) :: csmv => psb_c_hdiag_csmv - ! procedure, pass(a) :: in_vect_sv => psb_c_hdiag_inner_vect_sv - ! procedure, pass(a) :: scals => psb_c_hdiag_scals - ! procedure, pass(a) :: scalv => psb_c_hdiag_scal - ! procedure, pass(a) :: reallocate_nz => psb_c_hdiag_reallocate_nz - ! procedure, pass(a) :: allocate_mnnz => psb_c_hdiag_allocate_mnnz + procedure, nopass :: get_fmt => c_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => c_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_c_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_c_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_c_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_c_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_c_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_c_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_c_cuda_hdiag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_hdiag_from_coo - ! procedure, pass(a) :: cp_from_fmt => psb_c_cp_hdiag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_hdiag_from_coo - ! procedure, pass(a) :: mv_from_fmt => psb_c_mv_hdiag_from_fmt - procedure, pass(a) :: free => c_hdiag_free - procedure, pass(a) :: mold => psb_c_hdiag_mold - procedure, pass(a) :: to_gpu => psb_c_hdiag_to_gpu - final :: c_hdiag_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => c_cuda_hdiag_free + procedure, pass(a) :: mold => psb_c_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_hdiag_to_gpu + final :: c_cuda_hdiag_finalize #else contains - procedure, pass(a) :: mold => psb_c_hdiag_mold + procedure, pass(a) :: mold => psb_c_cuda_hdiag_mold #endif - end type psb_c_hdiag_sparse_mat + end type psb_c_cuda_hdiag_sparse_mat #ifdef HAVE_SPGPU - private :: c_hdiag_get_nzeros, c_hdiag_free, c_hdiag_get_fmt, & - & c_hdiag_get_size, c_hdiag_sizeof, c_hdiag_get_nz_row + private :: c_cuda_hdiag_get_nzeros, c_cuda_hdiag_free, c_cuda_hdiag_get_fmt, & + & c_cuda_hdiag_get_size, c_cuda_hdiag_sizeof, c_cuda_hdiag_get_nz_row interface - subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hdiag_vect_mv + end subroutine psb_c_cuda_hdiag_vect_mv end interface !!$ interface -!!$ subroutine psb_c_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_c_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type -!!$ class(psb_c_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_c_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_hdiag_inner_vect_sv +!!$ end subroutine psb_c_cuda_hdiag_inner_vect_sv !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_hdiag_reallocate_nz(nz,a) -!!$ import :: psb_c_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_c_hdiag_reallocate_nz +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_cuda_hdiag_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_hdiag_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_c_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_c_hdiag_allocate_mnnz +!!$ end subroutine psb_c_cuda_hdiag_allocate_mnnz !!$ end interface interface - subroutine psb_c_hdiag_mold(a,b,info) - import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hdiag_mold(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hdiag_mold + end subroutine psb_c_cuda_hdiag_mold end interface interface - subroutine psb_c_hdiag_to_gpu(a,info) - import :: psb_c_hdiag_sparse_mat, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hdiag_to_gpu(a,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hdiag_to_gpu + end subroutine psb_c_cuda_hdiag_to_gpu end interface interface - subroutine psb_c_cp_hdiag_from_coo(a,b,info) - import :: psb_c_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_hdiag_from_coo + end subroutine psb_c_cuda_cp_hdiag_from_coo end interface !!$ interface -!!$ subroutine psb_c_cp_hdiag_from_fmt(a,b,info) -!!$ import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_c_base_sparse_mat), intent(in) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_c_cp_hdiag_from_fmt +!!$ end subroutine psb_c_cuda_cp_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_c_mv_hdiag_from_coo(a,b,info) - import :: psb_c_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_hdiag_from_coo + end subroutine psb_c_cuda_mv_hdiag_from_coo end interface !!$ !!$ interface -!!$ subroutine psb_c_mv_hdiag_from_fmt(a,b,info) -!!$ import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_c_base_sparse_mat), intent(inout) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_c_mv_hdiag_from_fmt +!!$ end subroutine psb_c_cuda_mv_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hdiag_csmv + end subroutine psb_c_cuda_hdiag_csmv end interface !!$ interface -!!$ subroutine psb_c_hdiag_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) !!$ complex(psb_spk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_hdiag_csmm +!!$ end subroutine psb_c_cuda_hdiag_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_hdiag_scal(d,a,info, side) -!!$ import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ complex(psb_spk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_c_hdiag_scal +!!$ end subroutine psb_c_cuda_hdiag_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_hdiag_scals(d,a,info) -!!$ import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ complex(psb_spk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_c_hdiag_scals +!!$ end subroutine psb_c_cuda_hdiag_scals !!$ end interface !!$ @@ -223,11 +223,11 @@ contains ! ! == =================================== - function c_hdiag_get_fmt() result(res) + function c_cuda_hdiag_get_fmt() result(res) implicit none character(len=5) :: res res = 'HDIAG' - end function c_hdiag_get_fmt + end function c_cuda_hdiag_get_fmt @@ -243,11 +243,11 @@ contains ! ! == =================================== - subroutine c_hdiag_free(a) + subroutine c_cuda_hdiag_free(a) use hdiagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -256,12 +256,12 @@ contains return - end subroutine c_hdiag_free + end subroutine c_cuda_hdiag_free - subroutine c_hdiag_finalize(a) + subroutine c_cuda_hdiag_finalize(a) use hdiagdev_mod implicit none - type(psb_c_hdiag_sparse_mat), intent(inout) :: a + type(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -269,19 +269,19 @@ contains call a%psb_c_hdia_sparse_mat%free() return - end subroutine c_hdiag_finalize + end subroutine c_cuda_hdiag_finalize #else interface - subroutine psb_c_hdiag_mold(a,b,info) - import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hdiag_mold(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hdiag_mold + end subroutine psb_c_cuda_hdiag_mold end interface #endif -end module psb_c_hdiag_mat_mod +end module psb_c_cuda_hdiag_mat_mod diff --git a/cuda/psb_c_hlg_mat_mod.F90 b/cuda/psb_c_cuda_hlg_mat_mod.F90 similarity index 50% rename from cuda/psb_c_hlg_mat_mod.F90 rename to cuda/psb_c_cuda_hlg_mat_mod.F90 index 9236a202..74284f30 100644 --- a/cuda/psb_c_hlg_mat_mod.F90 +++ b/cuda/psb_c_cuda_hlg_mat_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_c_hlg_mat_mod +module psb_c_cuda_hlg_mat_mod use iso_c_binding use psb_c_mat_mod @@ -41,7 +41,7 @@ module psb_c_hlg_mat_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_c_hll_sparse_mat) :: psb_c_hlg_sparse_mat + type, extends(psb_c_hll_sparse_mat) :: psb_c_cuda_hlg_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,186 +54,186 @@ module psb_c_hlg_mat_mod integer :: devstate = is_host contains - procedure, nopass :: get_fmt => c_hlg_get_fmt - procedure, pass(a) :: sizeof => c_hlg_sizeof - procedure, pass(a) :: vect_mv => psb_c_hlg_vect_mv - procedure, pass(a) :: csmm => psb_c_hlg_csmm - procedure, pass(a) :: csmv => psb_c_hlg_csmv - procedure, pass(a) :: in_vect_sv => psb_c_hlg_inner_vect_sv - procedure, pass(a) :: scals => psb_c_hlg_scals - procedure, pass(a) :: scalv => psb_c_hlg_scal - procedure, pass(a) :: reallocate_nz => psb_c_hlg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_c_hlg_allocate_mnnz + procedure, nopass :: get_fmt => c_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_c_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_c_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_c_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_hlg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_hlg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_cp_hlg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_hlg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_mv_hlg_from_fmt - procedure, pass(a) :: free => c_hlg_free - procedure, pass(a) :: mold => psb_c_hlg_mold - procedure, pass(a) :: is_host => c_hlg_is_host - procedure, pass(a) :: is_dev => c_hlg_is_dev - procedure, pass(a) :: is_sync => c_hlg_is_sync - procedure, pass(a) :: set_host => c_hlg_set_host - procedure, pass(a) :: set_dev => c_hlg_set_dev - procedure, pass(a) :: set_sync => c_hlg_set_sync - procedure, pass(a) :: sync => c_hlg_sync - procedure, pass(a) :: from_gpu => psb_c_hlg_from_gpu - procedure, pass(a) :: to_gpu => psb_c_hlg_to_gpu - final :: c_hlg_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => c_cuda_hlg_free + procedure, pass(a) :: mold => psb_c_cuda_hlg_mold + procedure, pass(a) :: is_host => c_cuda_hlg_is_host + procedure, pass(a) :: is_dev => c_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => c_cuda_hlg_is_sync + procedure, pass(a) :: set_host => c_cuda_hlg_set_host + procedure, pass(a) :: set_dev => c_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => c_cuda_hlg_set_sync + procedure, pass(a) :: sync => c_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_c_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_c_cuda_hlg_to_gpu + final :: c_cuda_hlg_finalize #else contains - procedure, pass(a) :: mold => psb_c_hlg_mold + procedure, pass(a) :: mold => psb_c_cuda_hlg_mold #endif - end type psb_c_hlg_sparse_mat + end type psb_c_cuda_hlg_sparse_mat #ifdef HAVE_SPGPU - private :: c_hlg_get_nzeros, c_hlg_free, c_hlg_get_fmt, & - & c_hlg_get_size, c_hlg_sizeof, c_hlg_get_nz_row + private :: c_cuda_hlg_get_nzeros, c_cuda_hlg_free, c_cuda_hlg_get_fmt, & + & c_cuda_hlg_get_size, c_cuda_hlg_sizeof, c_cuda_hlg_get_nz_row interface - subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hlg_vect_mv + end subroutine psb_c_cuda_hlg_vect_mv end interface interface - subroutine psb_c_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_c_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hlg_inner_vect_sv + end subroutine psb_c_cuda_hlg_inner_vect_sv end interface interface - subroutine psb_c_hlg_reallocate_nz(nz,a) - import :: psb_c_hlg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_hlg_reallocate_nz(nz,a) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_c_hlg_sparse_mat), intent(inout) :: a - end subroutine psb_c_hlg_reallocate_nz + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_hlg_reallocate_nz end interface interface - subroutine psb_c_hlg_allocate_mnnz(m,n,a,nz) - import :: psb_c_hlg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_c_hlg_allocate_mnnz + end subroutine psb_c_cuda_hlg_allocate_mnnz end interface interface - subroutine psb_c_hlg_mold(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_mold(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hlg_mold + end subroutine psb_c_cuda_hlg_mold end interface interface - subroutine psb_c_hlg_from_gpu(a,info) - import :: psb_c_hlg_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hlg_from_gpu(a,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hlg_from_gpu + end subroutine psb_c_cuda_hlg_from_gpu end interface interface - subroutine psb_c_hlg_to_gpu(a,info, nzrm) - import :: psb_c_hlg_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_c_hlg_to_gpu + end subroutine psb_c_cuda_hlg_to_gpu end interface interface - subroutine psb_c_cp_hlg_from_coo(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_hlg_from_coo + end subroutine psb_c_cuda_cp_hlg_from_coo end interface interface - subroutine psb_c_cp_hlg_from_fmt(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_hlg_from_fmt + end subroutine psb_c_cuda_cp_hlg_from_fmt end interface interface - subroutine psb_c_mv_hlg_from_coo(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_hlg_from_coo + end subroutine psb_c_cuda_mv_hlg_from_coo end interface interface - subroutine psb_c_mv_hlg_from_fmt(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_hlg_from_fmt + end subroutine psb_c_cuda_mv_hlg_from_fmt end interface interface - subroutine psb_c_hlg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hlg_csmv + end subroutine psb_c_cuda_hlg_csmv end interface interface - subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hlg_csmm + end subroutine psb_c_cuda_hlg_csmm end interface interface - subroutine psb_c_hlg_scal(d,a,info, side) - import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hlg_scal(d,a,info, side) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_c_hlg_scal + end subroutine psb_c_cuda_hlg_scal end interface interface - subroutine psb_c_hlg_scals(d,a,info) - import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hlg_scals(d,a,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hlg_scals + end subroutine psb_c_cuda_hlg_scals end interface @@ -252,9 +252,9 @@ contains ! == =================================== - function c_hlg_sizeof(a) result(res) + function c_cuda_hlg_sizeof(a) result(res) implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res @@ -269,13 +269,13 @@ contains ! on the GPU device side? ! res = 2*res - end function c_hlg_sizeof + end function c_cuda_hlg_sizeof - function c_hlg_get_fmt() result(res) + function c_cuda_hlg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HLG' - end function c_hlg_get_fmt + end function c_cuda_hlg_get_fmt @@ -291,11 +291,11 @@ contains ! ! == =================================== - subroutine c_hlg_free(a) + subroutine c_cuda_hlg_free(a) use hlldev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) @@ -304,13 +304,13 @@ contains return - end subroutine c_hlg_free + end subroutine c_cuda_hlg_free - subroutine c_hlg_sync(a) + subroutine c_cuda_hlg_sync(a) implicit none - class(psb_c_hlg_sparse_mat), target, intent(in) :: a - class(psb_c_hlg_sparse_mat), pointer :: tmpa + class(psb_c_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -322,77 +322,77 @@ contains call tmpa%set_sync() return - end subroutine c_hlg_sync + end subroutine c_cuda_hlg_sync - subroutine c_hlg_set_host(a) + subroutine c_cuda_hlg_set_host(a) implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine c_hlg_set_host + end subroutine c_cuda_hlg_set_host - subroutine c_hlg_set_dev(a) + subroutine c_cuda_hlg_set_dev(a) implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine c_hlg_set_dev + end subroutine c_cuda_hlg_set_dev - subroutine c_hlg_set_sync(a) + subroutine c_cuda_hlg_set_sync(a) implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine c_hlg_set_sync + end subroutine c_cuda_hlg_set_sync - function c_hlg_is_dev(a) result(res) + function c_cuda_hlg_is_dev(a) result(res) implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function c_hlg_is_dev + end function c_cuda_hlg_is_dev - function c_hlg_is_host(a) result(res) + function c_cuda_hlg_is_host(a) result(res) implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function c_hlg_is_host + end function c_cuda_hlg_is_host - function c_hlg_is_sync(a) result(res) + function c_cuda_hlg_is_sync(a) result(res) implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function c_hlg_is_sync + end function c_cuda_hlg_is_sync - subroutine c_hlg_finalize(a) + subroutine c_cuda_hlg_finalize(a) use hlldev_mod implicit none - type(psb_c_hlg_sparse_mat), intent(inout) :: a + type(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine c_hlg_finalize + end subroutine c_cuda_hlg_finalize #else interface - subroutine psb_c_hlg_mold(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_mold(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hlg_mold + end subroutine psb_c_cuda_hlg_mold end interface #endif -end module psb_c_hlg_mat_mod +end module psb_c_cuda_hlg_mat_mod diff --git a/cuda/psb_c_hybg_mat_mod.F90 b/cuda/psb_c_cuda_hybg_mat_mod.F90 similarity index 52% rename from cuda/psb_c_hybg_mat_mod.F90 rename to cuda/psb_c_cuda_hybg_mat_mod.F90 index d5c605ec..d16988ba 100644 --- a/cuda/psb_c_hybg_mat_mod.F90 +++ b/cuda/psb_c_cuda_hybg_mat_mod.F90 @@ -31,13 +31,13 @@ #if CUDA_SHORT_VERSION <= 10 -module psb_c_hybg_mat_mod +module psb_c_cuda_hybg_mat_mod use iso_c_binding use psb_c_mat_mod use cusparse_mod - type, extends(psb_c_csr_sparse_mat) :: psb_c_hybg_sparse_mat + type, extends(psb_c_csr_sparse_mat) :: psb_c_cuda_hybg_sparse_mat ! ! HYBG. An interface to the cuSPARSE HYB ! On the CPU side we keep a CSR storage. @@ -49,170 +49,170 @@ module psb_c_hybg_mat_mod type(c_Hmat) :: deviceMat contains - procedure, nopass :: get_fmt => c_hybg_get_fmt - procedure, pass(a) :: sizeof => c_hybg_sizeof - procedure, pass(a) :: vect_mv => psb_c_hybg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_c_hybg_inner_vect_sv - procedure, pass(a) :: csmm => psb_c_hybg_csmm - procedure, pass(a) :: csmv => psb_c_hybg_csmv - procedure, pass(a) :: scals => psb_c_hybg_scals - procedure, pass(a) :: scalv => psb_c_hybg_scal - procedure, pass(a) :: reallocate_nz => psb_c_hybg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_c_hybg_allocate_mnnz + procedure, nopass :: get_fmt => c_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_c_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_c_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_hybg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_hybg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_cp_hybg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_hybg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_mv_hybg_from_fmt - procedure, pass(a) :: free => c_hybg_free - procedure, pass(a) :: mold => psb_c_hybg_mold - procedure, pass(a) :: to_gpu => psb_c_hybg_to_gpu - final :: c_hybg_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => c_cuda_hybg_free + procedure, pass(a) :: mold => psb_c_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_hybg_to_gpu + final :: c_cuda_hybg_finalize #else contains - procedure, pass(a) :: mold => psb_c_hybg_mold + procedure, pass(a) :: mold => psb_c_cuda_hybg_mold #endif - end type psb_c_hybg_sparse_mat + end type psb_c_cuda_hybg_sparse_mat #ifdef HAVE_SPGPU - private :: c_hybg_get_nzeros, c_hybg_free, c_hybg_get_fmt, & - & c_hybg_get_size, c_hybg_sizeof, c_hybg_get_nz_row + private :: c_cuda_hybg_get_nzeros, c_cuda_hybg_free, c_cuda_hybg_get_fmt, & + & c_cuda_hybg_get_size, c_cuda_hybg_sizeof, c_cuda_hybg_get_nz_row interface - subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hybg_inner_vect_sv + end subroutine psb_c_cuda_hybg_inner_vect_sv end interface interface - subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hybg_vect_mv + end subroutine psb_c_cuda_hybg_vect_mv end interface interface - subroutine psb_c_hybg_reallocate_nz(nz,a) - import :: psb_c_hybg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_hybg_reallocate_nz(nz,a) + import :: psb_c_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_c_hybg_sparse_mat), intent(inout) :: a - end subroutine psb_c_hybg_reallocate_nz + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_hybg_reallocate_nz end interface interface - subroutine psb_c_hybg_allocate_mnnz(m,n,a,nz) - import :: psb_c_hybg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_c_hybg_allocate_mnnz + end subroutine psb_c_cuda_hybg_allocate_mnnz end interface interface - subroutine psb_c_hybg_mold(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_mold(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hybg_mold + end subroutine psb_c_cuda_hybg_mold end interface interface - subroutine psb_c_hybg_to_gpu(a,info, nzrm) - import :: psb_c_hybg_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_c_hybg_to_gpu + end subroutine psb_c_cuda_hybg_to_gpu end interface interface - subroutine psb_c_cp_hybg_from_coo(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_hybg_from_coo + end subroutine psb_c_cuda_cp_hybg_from_coo end interface interface - subroutine psb_c_cp_hybg_from_fmt(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_hybg_from_fmt + end subroutine psb_c_cuda_cp_hybg_from_fmt end interface interface - subroutine psb_c_mv_hybg_from_coo(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_hybg_from_coo + end subroutine psb_c_cuda_mv_hybg_from_coo end interface interface - subroutine psb_c_mv_hybg_from_fmt(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_hybg_from_fmt + end subroutine psb_c_cuda_mv_hybg_from_fmt end interface interface - subroutine psb_c_hybg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hybg_csmv + end subroutine psb_c_cuda_hybg_csmv end interface interface - subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hybg_csmm + end subroutine psb_c_cuda_hybg_csmm end interface interface - subroutine psb_c_hybg_scal(d,a,info,side) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hybg_scal(d,a,info,side) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_c_hybg_scal + end subroutine psb_c_cuda_hybg_scal end interface interface - subroutine psb_c_hybg_scals(d,a,info) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hybg_scals(d,a,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hybg_scals + end subroutine psb_c_cuda_hybg_scals end interface @@ -231,9 +231,9 @@ contains ! == =================================== - function c_hybg_sizeof(a) result(res) + function c_cuda_hybg_sizeof(a) result(res) implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 res = res + (2*psb_sizeof_sp) * size(a%val) @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function c_hybg_sizeof + end function c_cuda_hybg_sizeof - function c_hybg_get_fmt() result(res) + function c_cuda_hybg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HYBG' - end function c_hybg_get_fmt + end function c_cuda_hybg_get_fmt @@ -265,42 +265,42 @@ contains ! ! == =================================== - subroutine c_hybg_free(a) + subroutine c_cuda_hybg_free(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) call a%psb_c_csr_sparse_mat%free() return - end subroutine c_hybg_free + end subroutine c_cuda_hybg_free - subroutine c_hybg_finalize(a) + subroutine c_cuda_hybg_finalize(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - type(psb_c_hybg_sparse_mat), intent(inout) :: a + type(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) return - end subroutine c_hybg_finalize + end subroutine c_cuda_hybg_finalize #else interface - subroutine psb_c_hybg_mold(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_mold(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hybg_mold + end subroutine psb_c_cuda_hybg_mold end interface #endif -end module psb_c_hybg_mat_mod +end module psb_c_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_c_gpu_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 similarity index 72% rename from cuda/psb_c_gpu_vect_mod.F90 rename to cuda/psb_c_cuda_vect_mod.F90 index 4c31154f..be06167e 100644 --- a/cuda/psb_c_gpu_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -30,15 +30,15 @@ ! -module psb_c_gpu_vect_mod +module psb_c_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod use psb_c_vect_mod use psb_i_vect_mod #ifdef HAVE_SPGPU - use psb_gpu_env_mod - use psb_i_gpu_vect_mod + use psb_cuda_env_mod + use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_c_vectordev_mod #endif @@ -47,7 +47,7 @@ module psb_c_gpu_vect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_c_base_vect_type) :: psb_c_vect_gpu + type, extends(psb_c_base_vect_type) :: psb_c_vect_cuda #ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr @@ -59,66 +59,66 @@ module psb_c_gpu_vect_mod type(c_ptr) :: i_buf = c_null_ptr integer :: i_buf_sz = 0 contains - procedure, pass(x) :: get_nrows => c_gpu_get_nrows - procedure, nopass :: get_fmt => c_gpu_get_fmt - - procedure, pass(x) :: all => c_gpu_all - procedure, pass(x) :: zero => c_gpu_zero - procedure, pass(x) :: asb_m => c_gpu_asb_m - procedure, pass(x) :: sync => c_gpu_sync - procedure, pass(x) :: sync_space => c_gpu_sync_space - procedure, pass(x) :: bld_x => c_gpu_bld_x - procedure, pass(x) :: bld_mn => c_gpu_bld_mn - procedure, pass(x) :: free => c_gpu_free - procedure, pass(x) :: ins_a => c_gpu_ins_a - procedure, pass(x) :: ins_v => c_gpu_ins_v - procedure, pass(x) :: is_host => c_gpu_is_host - procedure, pass(x) :: is_dev => c_gpu_is_dev - procedure, pass(x) :: is_sync => c_gpu_is_sync - procedure, pass(x) :: set_host => c_gpu_set_host - procedure, pass(x) :: set_dev => c_gpu_set_dev - procedure, pass(x) :: set_sync => c_gpu_set_sync - procedure, pass(x) :: set_scal => c_gpu_set_scal -!!$ procedure, pass(x) :: set_vect => c_gpu_set_vect - procedure, pass(x) :: gthzv_x => c_gpu_gthzv_x - procedure, pass(y) :: sctb => c_gpu_sctb - procedure, pass(y) :: sctb_x => c_gpu_sctb_x - procedure, pass(x) :: gthzbuf => c_gpu_gthzbuf - procedure, pass(y) :: sctb_buf => c_gpu_sctb_buf - procedure, pass(x) :: new_buffer => c_gpu_new_buffer - procedure, nopass :: device_wait => c_gpu_device_wait - procedure, pass(x) :: free_buffer => c_gpu_free_buffer - procedure, pass(x) :: maybe_free_buffer => c_gpu_maybe_free_buffer - procedure, pass(x) :: dot_v => c_gpu_dot_v - procedure, pass(x) :: dot_a => c_gpu_dot_a - procedure, pass(y) :: axpby_v => c_gpu_axpby_v - procedure, pass(y) :: axpby_a => c_gpu_axpby_a - procedure, pass(y) :: mlt_v => c_gpu_mlt_v - procedure, pass(y) :: mlt_a => c_gpu_mlt_a - procedure, pass(z) :: mlt_a_2 => c_gpu_mlt_a_2 - procedure, pass(z) :: mlt_v_2 => c_gpu_mlt_v_2 - procedure, pass(x) :: scal => c_gpu_scal - procedure, pass(x) :: nrm2 => c_gpu_nrm2 - procedure, pass(x) :: amax => c_gpu_amax - procedure, pass(x) :: asum => c_gpu_asum - procedure, pass(x) :: absval1 => c_gpu_absval1 - procedure, pass(x) :: absval2 => c_gpu_absval2 - - final :: c_gpu_vect_finalize + procedure, pass(x) :: get_nrows => c_cuda_get_nrows + procedure, nopass :: get_fmt => c_cuda_get_fmt + + procedure, pass(x) :: all => c_cuda_all + procedure, pass(x) :: zero => c_cuda_zero + procedure, pass(x) :: asb_m => c_cuda_asb_m + procedure, pass(x) :: sync => c_cuda_sync + procedure, pass(x) :: sync_space => c_cuda_sync_space + procedure, pass(x) :: bld_x => c_cuda_bld_x + procedure, pass(x) :: bld_mn => c_cuda_bld_mn + procedure, pass(x) :: free => c_cuda_free + procedure, pass(x) :: ins_a => c_cuda_ins_a + procedure, pass(x) :: ins_v => c_cuda_ins_v + procedure, pass(x) :: is_host => c_cuda_is_host + procedure, pass(x) :: is_dev => c_cuda_is_dev + procedure, pass(x) :: is_sync => c_cuda_is_sync + procedure, pass(x) :: set_host => c_cuda_set_host + procedure, pass(x) :: set_dev => c_cuda_set_dev + procedure, pass(x) :: set_sync => c_cuda_set_sync + procedure, pass(x) :: set_scal => c_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => c_cuda_set_vect + procedure, pass(x) :: gthzv_x => c_cuda_gthzv_x + procedure, pass(y) :: sctb => c_cuda_sctb + procedure, pass(y) :: sctb_x => c_cuda_sctb_x + procedure, pass(x) :: gthzbuf => c_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => c_cuda_sctb_buf + procedure, pass(x) :: new_buffer => c_cuda_new_buffer + procedure, nopass :: device_wait => c_cuda_device_wait + procedure, pass(x) :: free_buffer => c_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => c_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => c_cuda_dot_v + procedure, pass(x) :: dot_a => c_cuda_dot_a + procedure, pass(y) :: axpby_v => c_cuda_axpby_v + procedure, pass(y) :: axpby_a => c_cuda_axpby_a + procedure, pass(y) :: mlt_v => c_cuda_mlt_v + procedure, pass(y) :: mlt_a => c_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => c_cuda_mlt_v_2 + procedure, pass(x) :: scal => c_cuda_scal + procedure, pass(x) :: nrm2 => c_cuda_nrm2 + procedure, pass(x) :: amax => c_cuda_amax + procedure, pass(x) :: asum => c_cuda_asum + procedure, pass(x) :: absval1 => c_cuda_absval1 + procedure, pass(x) :: absval2 => c_cuda_absval2 + + final :: c_cuda_vect_finalize #endif - end type psb_c_vect_gpu + end type psb_c_vect_cuda - public :: psb_c_vect_gpu_ + public :: psb_c_vect_cuda_ private :: constructor - interface psb_c_vect_gpu_ + interface psb_c_vect_cuda_ module procedure constructor - end interface psb_c_vect_gpu_ + end interface psb_c_vect_cuda_ contains function constructor(x) result(this) complex(psb_spk_) :: x(:) - type(psb_c_vect_gpu) :: this + type(psb_c_vect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -128,20 +128,20 @@ contains #ifdef HAVE_SPGPU - subroutine c_gpu_device_wait() + subroutine c_cuda_device_wait() call psb_cudaSync() - end subroutine c_gpu_device_wait + end subroutine c_cuda_device_wait - subroutine c_gpu_new_buffer(n,x,info) + subroutine c_cuda_new_buffer(n,x,info) use psb_realloc_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(x%combuf)) then if (size(x%combuf) idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then ! ! Only need a sync in this branch; in the others ! cudamemCpy acts as a sync point. @@ -331,14 +331,14 @@ contains end select - end subroutine c_gpu_gthzv_x + end subroutine c_cuda_gthzv_x - subroutine c_gpu_gthzbuf(i,n,idx,x) - use psb_gpu_env_mod + subroutine c_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i,n class(psb_i_base_vect_type) :: idx - class(psb_c_vect_gpu) :: x + class(psb_c_vect_cuda) :: x integer :: info, ni info = 0 @@ -349,11 +349,11 @@ contains end if select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) @@ -384,14 +384,14 @@ contains end select - end subroutine c_gpu_gthzbuf + end subroutine c_cuda_gthzbuf - subroutine c_gpu_sctb(n,idx,x,beta,y) + subroutine c_cuda_sctb(n,idx,x,beta,y) implicit none !use psb_const_mod integer(psb_ipk_) :: n, idx(:) complex(psb_spk_) :: beta, x(:) - class(psb_c_vect_gpu) :: y + class(psb_c_vect_cuda) :: y integer(psb_ipk_) :: info if (n == 0) return @@ -401,24 +401,24 @@ contains call y%psb_c_base_vect_type%sctb(n,idx,x,beta) call y%set_host() - end subroutine c_gpu_sctb + end subroutine c_cuda_sctb - subroutine c_gpu_sctb_x(i,n,idx,x,beta,y) - use psb_gpu_env_mod + subroutine c_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx complex(psb_spk_) :: beta, x(:) - class(psb_c_vect_gpu) :: y + class(psb_c_vect_cuda) :: y integer :: info, ni select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() ! - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(y%pinned_buffer)) then if (size(y%pinned_buffer) < n) then call inner_unregister(y%pinned_buffer) @@ -506,16 +506,16 @@ contains call psb_cudaSync() call y%set_dev() - end subroutine c_gpu_sctb_x + end subroutine c_cuda_sctb_x - subroutine c_gpu_sctb_buf(i,n,idx,beta,y) + subroutine c_cuda_sctb_buf(i,n,idx,beta,y) use psi_serial_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx complex(psb_spk_) :: beta - class(psb_c_vect_gpu) :: y + class(psb_c_vect_cuda) :: y integer(psb_ipk_) :: info, ni !!$ write(0,*) 'Starting sctb_buf' @@ -526,11 +526,11 @@ contains select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) else @@ -557,106 +557,106 @@ contains end select !!$ write(0,*) 'Done sctb_buf' - end subroutine c_gpu_sctb_buf + end subroutine c_cuda_sctb_buf - subroutine c_gpu_bld_x(x,this) + subroutine c_cuda_bld_x(x,this) use psb_base_mod complex(psb_spk_), intent(in) :: this(:) - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call psb_realloc(size(this),x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'c_gpu_bld_x',& + call psb_errpush(info,'c_cuda_bld_x',& & i_err=(/size(this),izero,izero,izero,izero/)) end if x%v(:) = this(:) call x%set_host() call x%sync() - end subroutine c_gpu_bld_x + end subroutine c_cuda_bld_x - subroutine c_gpu_bld_mn(x,n) + subroutine c_cuda_bld_mn(x,n) integer(psb_mpk_), intent(in) :: n - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(n,info) if (info /= 0) then - call psb_errpush(info,'c_gpu_bld_n',i_err=(/n,n,n,n,n/)) + call psb_errpush(info,'c_cuda_bld_n',i_err=(/n,n,n,n,n/)) end if - end subroutine c_gpu_bld_mn + end subroutine c_cuda_bld_mn - subroutine c_gpu_set_host(x) + subroutine c_cuda_set_host(x) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x x%state = is_host - end subroutine c_gpu_set_host + end subroutine c_cuda_set_host - subroutine c_gpu_set_dev(x) + subroutine c_cuda_set_dev(x) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x x%state = is_dev - end subroutine c_gpu_set_dev + end subroutine c_cuda_set_dev - subroutine c_gpu_set_sync(x) + subroutine c_cuda_set_sync(x) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x x%state = is_sync - end subroutine c_gpu_set_sync + end subroutine c_cuda_set_sync - function c_gpu_is_dev(x) result(res) + function c_cuda_is_dev(x) result(res) implicit none - class(psb_c_vect_gpu), intent(in) :: x + class(psb_c_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function c_gpu_is_dev + end function c_cuda_is_dev - function c_gpu_is_host(x) result(res) + function c_cuda_is_host(x) result(res) implicit none - class(psb_c_vect_gpu), intent(in) :: x + class(psb_c_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function c_gpu_is_host + end function c_cuda_is_host - function c_gpu_is_sync(x) result(res) + function c_cuda_is_sync(x) result(res) implicit none - class(psb_c_vect_gpu), intent(in) :: x + class(psb_c_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function c_gpu_is_sync + end function c_cuda_is_sync - function c_gpu_get_nrows(x) result(res) + function c_cuda_get_nrows(x) result(res) implicit none - class(psb_c_vect_gpu), intent(in) :: x + class(psb_c_vect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = size(x%v) - end function c_gpu_get_nrows + end function c_cuda_get_nrows - function c_gpu_get_fmt() result(res) + function c_cuda_get_fmt() result(res) implicit none character(len=5) :: res res = 'cGPU' - end function c_gpu_get_fmt + end function c_cuda_get_fmt - subroutine c_gpu_all(n, x, info) + subroutine c_cuda_all(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: n - class(psb_c_vect_gpu), intent(out) :: x + class(psb_c_vect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(n,x%v,info) @@ -664,26 +664,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'c_gpu_all',& + call psb_errpush(info,'c_cuda_all',& & i_err=(/n,n,n,n,n/)) end if - end subroutine c_gpu_all + end subroutine c_cuda_all - subroutine c_gpu_zero(x) + subroutine c_cuda_zero(x) use psi_serial_mod implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=czero call x%set_host() - end subroutine c_gpu_zero + end subroutine c_cuda_zero - subroutine c_gpu_asb_m(n, x, info) + subroutine c_cuda_asb_m(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_mpk_), intent(in) :: n - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: nd @@ -703,12 +703,12 @@ contains end if end if - end subroutine c_gpu_asb_m + end subroutine c_cuda_asb_m - subroutine c_gpu_sync_space(x,info) + subroutine c_cuda_sync_space(x,info) use psb_base_mod, only : psb_realloc implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh, nd @@ -747,12 +747,12 @@ contains end if end if - end subroutine c_gpu_sync_space + end subroutine c_cuda_sync_space - subroutine c_gpu_sync(x) + subroutine c_cuda_sync(x) use psb_base_mod, only : psb_realloc implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -778,31 +778,31 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'c_gpu_sync') + call psb_errpush(info,'c_cuda_sync') end if - end subroutine c_gpu_sync + end subroutine c_cuda_sync - subroutine c_gpu_free(x, info) + subroutine c_cuda_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) if (c_associated(x%deviceVect)) then -!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' call freeMultiVecDevice(x%deviceVect) x%deviceVect=c_null_ptr end if call x%free_buffer(info) call x%set_sync() - end subroutine c_gpu_free + end subroutine c_cuda_free - subroutine c_gpu_set_scal(x,val,first,last) - class(psb_c_vect_gpu), intent(inout) :: x + subroutine c_cuda_set_scal(x,val,first,last) + class(psb_c_vect_cuda), intent(inout) :: x complex(psb_spk_), intent(in) :: val integer(psb_ipk_), optional :: first, last @@ -817,10 +817,10 @@ contains info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() - end subroutine c_gpu_set_scal + end subroutine c_cuda_set_scal !!$ -!!$ subroutine c_gpu_set_vect(x,val) -!!$ class(psb_c_vect_gpu), intent(inout) :: x +!!$ subroutine c_cuda_set_vect(x,val) +!!$ class(psb_c_vect_cuda), intent(inout) :: x !!$ complex(psb_spk_), intent(in) :: val(:) !!$ integer(psb_ipk_) :: nr !!$ integer(psb_ipk_) :: info @@ -829,13 +829,13 @@ contains !!$ call x%psb_c_base_vect_type%set_vect(val) !!$ call x%set_host() !!$ -!!$ end subroutine c_gpu_set_vect +!!$ end subroutine c_cuda_set_vect - function c_gpu_dot_v(n,x,y) result(res) + function c_cuda_dot_v(n,x,y) result(res) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n complex(psb_spk_) :: res @@ -852,13 +852,13 @@ contains type is (psb_c_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n,x%v,1,yy%v,1) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) if (info /= 0) then info = psb_err_internal_error_ - call psb_errpush(info,'c_gpu_dot_v') + call psb_errpush(info,'c_cuda_dot_v') end if class default @@ -867,11 +867,11 @@ contains res = y%dot(n,x%v) end select - end function c_gpu_dot_v + end function c_cuda_dot_v - function c_gpu_dot_a(n,x,y) result(res) + function c_cuda_dot_a(n,x,y) result(res) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x complex(psb_spk_), intent(in) :: y(:) integer(psb_ipk_), intent(in) :: n complex(psb_spk_) :: res @@ -880,14 +880,14 @@ contains if (x%is_dev()) call x%sync() res = ddot(n,y,1,x%v,1) - end function c_gpu_dot_a + end function c_cuda_dot_a - subroutine c_gpu_axpby_v(m,alpha, x, beta, y, info) + subroutine c_cuda_axpby_v(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m class(psb_c_base_vect_type), intent(inout) :: x - class(psb_c_vect_gpu), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: y complex(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny @@ -895,7 +895,7 @@ contains info = psb_success_ select type(xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) ! Do something different here if ((beta /= czero).and.y%is_host())& & call y%sync() @@ -915,14 +915,14 @@ contains call y%axpby(m,alpha,x%v,beta,info) end select - end subroutine c_gpu_axpby_v + end subroutine c_cuda_axpby_v - subroutine c_gpu_axpby_a(m,alpha, x, beta, y, info) + subroutine c_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m complex(psb_spk_), intent(in) :: x(:) - class(psb_c_vect_gpu), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: y complex(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info @@ -930,13 +930,13 @@ contains & call y%sync() call psb_geaxpby(m,alpha,x,beta,y%v,info) call y%set_host() - end subroutine c_gpu_axpby_a + end subroutine c_cuda_axpby_a - subroutine c_gpu_mlt_v(x, y, info) + subroutine c_cuda_mlt_v(x, y, info) use psi_serial_mod implicit none class(psb_c_base_vect_type), intent(inout) :: x - class(psb_c_vect_gpu), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -950,7 +950,7 @@ contains y%v(i) = y%v(i) * xx%v(i) end do call y%set_host() - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) ! Do something different here if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -963,13 +963,13 @@ contains call y%set_host() end select - end subroutine c_gpu_mlt_v + end subroutine c_cuda_mlt_v - subroutine c_gpu_mlt_a(x, y, info) + subroutine c_cuda_mlt_a(x, y, info) use psi_serial_mod implicit none complex(psb_spk_), intent(in) :: x(:) - class(psb_c_vect_gpu), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -977,15 +977,15 @@ contains if (y%is_dev()) call y%sync() call y%psb_c_base_vect_type%mlt(x,info) ! set_host() is invoked in the base method - end subroutine c_gpu_mlt_a + end subroutine c_cuda_mlt_a - subroutine c_gpu_mlt_a_2(alpha,x,y,beta,z,info) + subroutine c_cuda_mlt_a_2(alpha,x,y,beta,z,info) use psi_serial_mod implicit none complex(psb_spk_), intent(in) :: alpha,beta complex(psb_spk_), intent(in) :: x(:) complex(psb_spk_), intent(in) :: y(:) - class(psb_c_vect_gpu), intent(inout) :: z + class(psb_c_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -993,16 +993,16 @@ contains if (z%is_dev()) call z%sync() call z%psb_c_base_vect_type%mlt(alpha,x,y,beta,info) ! set_host() is invoked in the base method - end subroutine c_gpu_mlt_a_2 + end subroutine c_cuda_mlt_a_2 - subroutine c_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + subroutine c_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) use psi_serial_mod use psb_string_mod implicit none complex(psb_spk_), intent(in) :: alpha,beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y - class(psb_c_vect_gpu), intent(inout) :: z + class(psb_c_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n @@ -1025,9 +1025,9 @@ contains ! info = 0 select type(xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type (yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= czero).and.(z%is_host())) call z%sync() @@ -1049,23 +1049,23 @@ contains call z%psb_c_base_vect_type%mlt(alpha,x,y,beta,info) call z%set_host() end select - end subroutine c_gpu_mlt_v_2 + end subroutine c_cuda_mlt_v_2 - subroutine c_gpu_scal(alpha, x) + subroutine c_cuda_scal(alpha, x) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x complex(psb_spk_), intent (in) :: alpha integer(psb_ipk_) :: info if (x%is_host()) call x%sync() info = scalMultiVecDevice(alpha,x%deviceVect) call x%set_dev() - end subroutine c_gpu_scal + end subroutine c_cuda_scal - function c_gpu_nrm2(n,x) result(res) + function c_cuda_nrm2(n,x) result(res) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1073,11 +1073,11 @@ contains if (x%is_host()) call x%sync() info = nrm2MultiVecDeviceComplex(res,n,x%deviceVect) - end function c_gpu_nrm2 + end function c_cuda_nrm2 - function c_gpu_amax(n,x) result(res) + function c_cuda_amax(n,x) result(res) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1085,11 +1085,11 @@ contains if (x%is_host()) call x%sync() info = amaxMultiVecDeviceComplex(res,n,x%deviceVect) - end function c_gpu_amax + end function c_cuda_amax - function c_gpu_asum(n,x) result(res) + function c_cuda_asum(n,x) result(res) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1097,11 +1097,11 @@ contains if (x%is_host()) call x%sync() info = asumMultiVecDeviceComplex(res,n,x%deviceVect) - end function c_gpu_asum + end function c_cuda_asum - subroutine c_gpu_absval1(x) + subroutine c_cuda_absval1(x) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n integer(psb_ipk_) :: info @@ -1109,18 +1109,18 @@ contains n=x%get_nrows() info = absMultiVecDevice(n,cone,x%deviceVect) - end subroutine c_gpu_absval1 + end subroutine c_cuda_absval1 - subroutine c_gpu_absval2(x,y) + subroutine c_cuda_absval2(x,y) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_) :: n integer(psb_ipk_) :: info n=min(x%get_nrows(),y%get_nrows()) select type (yy=> y) - class is (psb_c_vect_gpu) + class is (psb_c_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = absMultiVecDevice(n,cone,x%deviceVect,yy%deviceVect) @@ -1129,67 +1129,67 @@ contains if (y%is_dev()) call y%sync() call x%psb_c_base_vect_type%absval(y) end select - end subroutine c_gpu_absval2 + end subroutine c_cuda_absval2 - subroutine c_gpu_vect_finalize(x) + subroutine c_cuda_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_c_vect_gpu), intent(inout) :: x + type(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 call x%free(info) - end subroutine c_gpu_vect_finalize + end subroutine c_cuda_vect_finalize - subroutine c_gpu_ins_v(n,irl,val,dupl,x,info) + subroutine c_cuda_ins_v(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl class(psb_i_base_vect_type), intent(inout) :: irl class(psb_c_base_vect_type), intent(inout) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, isz - logical :: done_gpu + logical :: done_cuda info = 0 if (psb_errstatus_fatal()) return - done_gpu = .false. + done_cuda = .false. select type(virl => irl) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type(vval => val) - class is (psb_c_vect_gpu) + class is (psb_c_vect_cuda) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() info = geinsMultiVecDeviceFloatComplex(n,virl%deviceVect,& & vval%deviceVect,dupl,1,x%deviceVect) call x%set_dev() - done_gpu=.true. + done_cuda=.true. end select end select - if (.not.done_gpu) then + if (.not.done_cuda) then if (irl%is_dev()) call irl%sync() if (val%is_dev()) call val%sync() call x%ins(n,irl%v,val%v,dupl,info) end if if (info /= 0) then - call psb_errpush(info,'gpu_vect_ins') + call psb_errpush(info,'cuda_vect_ins') return end if - end subroutine c_gpu_ins_v + end subroutine c_cuda_ins_v - subroutine c_gpu_ins_a(n,irl,val,dupl,x,info) + subroutine c_cuda_ins_a(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) complex(psb_spk_), intent(in) :: val(:) @@ -1202,11 +1202,11 @@ contains call x%psb_c_base_vect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine c_gpu_ins_a + end subroutine c_cuda_ins_a #endif -end module psb_c_gpu_vect_mod +end module psb_c_cuda_vect_mod ! @@ -1215,7 +1215,7 @@ end module psb_c_gpu_vect_mod -module psb_c_gpu_multivect_mod +module psb_c_cuda_multivect_mod use iso_c_binding use psb_const_mod use psb_error_mod @@ -1224,7 +1224,7 @@ module psb_c_gpu_multivect_mod use psb_i_multivect_mod #ifdef HAVE_SPGPU - use psb_i_gpu_multivect_mod + use psb_i_cuda_multivect_mod use psb_c_vectordev_mod #endif @@ -1232,7 +1232,7 @@ module psb_c_gpu_multivect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_c_base_multivect_type) :: psb_c_multivect_gpu + type, extends(psb_c_base_multivect_type) :: psb_c_multivect_cuda #ifdef HAVE_SPGPU integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 @@ -1240,48 +1240,48 @@ module psb_c_gpu_multivect_mod real(c_double), allocatable :: buffer(:,:) type(c_ptr) :: dt_buf = c_null_ptr contains - procedure, pass(x) :: get_nrows => c_gpu_multi_get_nrows - procedure, pass(x) :: get_ncols => c_gpu_multi_get_ncols - procedure, nopass :: get_fmt => c_gpu_multi_get_fmt -!!$ procedure, pass(x) :: dot_v => c_gpu_multi_dot_v -!!$ procedure, pass(x) :: dot_a => c_gpu_multi_dot_a -!!$ procedure, pass(y) :: axpby_v => c_gpu_multi_axpby_v -!!$ procedure, pass(y) :: axpby_a => c_gpu_multi_axpby_a -!!$ procedure, pass(y) :: mlt_v => c_gpu_multi_mlt_v -!!$ procedure, pass(y) :: mlt_a => c_gpu_multi_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => c_gpu_multi_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => c_gpu_multi_mlt_v_2 -!!$ procedure, pass(x) :: scal => c_gpu_multi_scal -!!$ procedure, pass(x) :: nrm2 => c_gpu_multi_nrm2 -!!$ procedure, pass(x) :: amax => c_gpu_multi_amax -!!$ procedure, pass(x) :: asum => c_gpu_multi_asum - procedure, pass(x) :: all => c_gpu_multi_all - procedure, pass(x) :: zero => c_gpu_multi_zero - procedure, pass(x) :: asb => c_gpu_multi_asb - procedure, pass(x) :: sync => c_gpu_multi_sync - procedure, pass(x) :: sync_space => c_gpu_multi_sync_space - procedure, pass(x) :: bld_x => c_gpu_multi_bld_x - procedure, pass(x) :: bld_n => c_gpu_multi_bld_n - procedure, pass(x) :: free => c_gpu_multi_free - procedure, pass(x) :: ins => c_gpu_multi_ins - procedure, pass(x) :: is_host => c_gpu_multi_is_host - procedure, pass(x) :: is_dev => c_gpu_multi_is_dev - procedure, pass(x) :: is_sync => c_gpu_multi_is_sync - procedure, pass(x) :: set_host => c_gpu_multi_set_host - procedure, pass(x) :: set_dev => c_gpu_multi_set_dev - procedure, pass(x) :: set_sync => c_gpu_multi_set_sync - procedure, pass(x) :: set_scal => c_gpu_multi_set_scal - procedure, pass(x) :: set_vect => c_gpu_multi_set_vect -!!$ procedure, pass(x) :: gthzv_x => c_gpu_multi_gthzv_x -!!$ procedure, pass(y) :: sctb => c_gpu_multi_sctb -!!$ procedure, pass(y) :: sctb_x => c_gpu_multi_sctb_x - final :: c_gpu_multi_vect_finalize + procedure, pass(x) :: get_nrows => c_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => c_cuda_multi_get_ncols + procedure, nopass :: get_fmt => c_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => c_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => c_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => c_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => c_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => c_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => c_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => c_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => c_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => c_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => c_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => c_cuda_multi_amax +!!$ procedure, pass(x) :: asum => c_cuda_multi_asum + procedure, pass(x) :: all => c_cuda_multi_all + procedure, pass(x) :: zero => c_cuda_multi_zero + procedure, pass(x) :: asb => c_cuda_multi_asb + procedure, pass(x) :: sync => c_cuda_multi_sync + procedure, pass(x) :: sync_space => c_cuda_multi_sync_space + procedure, pass(x) :: bld_x => c_cuda_multi_bld_x + procedure, pass(x) :: bld_n => c_cuda_multi_bld_n + procedure, pass(x) :: free => c_cuda_multi_free + procedure, pass(x) :: ins => c_cuda_multi_ins + procedure, pass(x) :: is_host => c_cuda_multi_is_host + procedure, pass(x) :: is_dev => c_cuda_multi_is_dev + procedure, pass(x) :: is_sync => c_cuda_multi_is_sync + procedure, pass(x) :: set_host => c_cuda_multi_set_host + procedure, pass(x) :: set_dev => c_cuda_multi_set_dev + procedure, pass(x) :: set_sync => c_cuda_multi_set_sync + procedure, pass(x) :: set_scal => c_cuda_multi_set_scal + procedure, pass(x) :: set_vect => c_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => c_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => c_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => c_cuda_multi_sctb_x + final :: c_cuda_multi_vect_finalize #endif - end type psb_c_multivect_gpu + end type psb_c_multivect_cuda - public :: psb_c_multivect_gpu + public :: psb_c_multivect_cuda private :: constructor - interface psb_c_multivect_gpu + interface psb_c_multivect_cuda module procedure constructor end interface @@ -1289,7 +1289,7 @@ contains function constructor(x) result(this) complex(psb_spk_) :: x(:,:) - type(psb_c_multivect_gpu) :: this + type(psb_c_multivect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -1299,15 +1299,15 @@ contains #ifdef HAVE_SPGPU -!!$ subroutine c_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ subroutine c_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i,n !!$ class(psb_i_base_multivect_type) :: idx !!$ complex(psb_spk_) :: y(:) -!!$ class(psb_c_multivect_gpu) :: x +!!$ class(psb_c_multivect_cuda) :: x !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (x%is_host()) call x%sync() !!$ @@ -1332,16 +1332,16 @@ contains !!$ end select !!$ !!$ -!!$ end subroutine c_gpu_multi_gthzv_x +!!$ end subroutine c_cuda_multi_gthzv_x !!$ !!$ !!$ -!!$ subroutine c_gpu_multi_sctb(n,idx,x,beta,y) +!!$ subroutine c_cuda_multi_sctb(n,idx,x,beta,y) !!$ implicit none !!$ !use psb_const_mod !!$ integer(psb_ipk_) :: n, idx(:) !!$ complex(psb_spk_) :: beta, x(:) -!!$ class(psb_c_multivect_gpu) :: y +!!$ class(psb_c_multivect_cuda) :: y !!$ integer(psb_ipk_) :: info !!$ !!$ if (n == 0) return @@ -1351,17 +1351,17 @@ contains !!$ call y%psb_c_base_multivect_type%sctb(n,idx,x,beta) !!$ call y%set_host() !!$ -!!$ end subroutine c_gpu_multi_sctb +!!$ end subroutine c_cuda_multi_sctb !!$ -!!$ subroutine c_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ subroutine c_cuda_multi_sctb_x(i,n,idx,x,beta,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i, n !!$ class(psb_i_base_multivect_type) :: idx !!$ complex(psb_spk_) :: beta, x(:) -!!$ class(psb_c_multivect_gpu) :: y +!!$ class(psb_c_multivect_cuda) :: y !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (y%is_host()) call y%sync() !!$ @@ -1387,13 +1387,13 @@ contains !!$ call y%sct(n,ii%v(i:),x,beta) !!$ end select !!$ -!!$ end subroutine c_gpu_multi_sctb_x +!!$ end subroutine c_cuda_multi_sctb_x - subroutine c_gpu_multi_bld_x(x,this) + subroutine c_cuda_multi_bld_x(x,this) use psb_base_mod complex(psb_spk_), intent(in) :: this(:,:) - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info, m, n m=size(this,1) @@ -1403,101 +1403,101 @@ contains call psb_realloc(m,n,x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'c_gpu_multi_bld_x',& + call psb_errpush(info,'c_cuda_multi_bld_x',& & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) end if x%v(1:m,1:n) = this(1:m,1:n) call x%set_host() call x%sync() - end subroutine c_gpu_multi_bld_x + end subroutine c_cuda_multi_bld_x - subroutine c_gpu_multi_bld_n(x,m,n) + subroutine c_cuda_multi_bld_n(x,m,n) integer(psb_ipk_), intent(in) :: m,n - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(m,n,info) if (info /= 0) then - call psb_errpush(info,'c_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + call psb_errpush(info,'c_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) end if - end subroutine c_gpu_multi_bld_n + end subroutine c_cuda_multi_bld_n - subroutine c_gpu_multi_set_host(x) + subroutine c_cuda_multi_set_host(x) implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x x%state = is_host - end subroutine c_gpu_multi_set_host + end subroutine c_cuda_multi_set_host - subroutine c_gpu_multi_set_dev(x) + subroutine c_cuda_multi_set_dev(x) implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x x%state = is_dev - end subroutine c_gpu_multi_set_dev + end subroutine c_cuda_multi_set_dev - subroutine c_gpu_multi_set_sync(x) + subroutine c_cuda_multi_set_sync(x) implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x x%state = is_sync - end subroutine c_gpu_multi_set_sync + end subroutine c_cuda_multi_set_sync - function c_gpu_multi_is_dev(x) result(res) + function c_cuda_multi_is_dev(x) result(res) implicit none - class(psb_c_multivect_gpu), intent(in) :: x + class(psb_c_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function c_gpu_multi_is_dev + end function c_cuda_multi_is_dev - function c_gpu_multi_is_host(x) result(res) + function c_cuda_multi_is_host(x) result(res) implicit none - class(psb_c_multivect_gpu), intent(in) :: x + class(psb_c_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function c_gpu_multi_is_host + end function c_cuda_multi_is_host - function c_gpu_multi_is_sync(x) result(res) + function c_cuda_multi_is_sync(x) result(res) implicit none - class(psb_c_multivect_gpu), intent(in) :: x + class(psb_c_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function c_gpu_multi_is_sync + end function c_cuda_multi_is_sync - function c_gpu_multi_get_nrows(x) result(res) + function c_cuda_multi_get_nrows(x) result(res) implicit none - class(psb_c_multivect_gpu), intent(in) :: x + class(psb_c_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_nrows - end function c_gpu_multi_get_nrows + end function c_cuda_multi_get_nrows - function c_gpu_multi_get_ncols(x) result(res) + function c_cuda_multi_get_ncols(x) result(res) implicit none - class(psb_c_multivect_gpu), intent(in) :: x + class(psb_c_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_ncols - end function c_gpu_multi_get_ncols + end function c_cuda_multi_get_ncols - function c_gpu_multi_get_fmt() result(res) + function c_cuda_multi_get_fmt() result(res) implicit none character(len=5) :: res res = 'cGPU' - end function c_gpu_multi_get_fmt + end function c_cuda_multi_get_fmt -!!$ function c_gpu_multi_dot_v(n,x,y) result(res) +!!$ function c_cuda_multi_dot_v(n,x,y) result(res) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ class(psb_c_base_multivect_type), intent(inout) :: y !!$ integer(psb_ipk_), intent(in) :: n !!$ complex(psb_spk_) :: res @@ -1514,13 +1514,13 @@ contains !!$ type is (psb_c_base_multivect_type) !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,x%v,1,yy%v,1) -!!$ type is (psb_c_multivect_gpu) +!!$ type is (psb_c_multivect_cuda) !!$ if (x%is_host()) call x%sync() !!$ if (yy%is_host()) call yy%sync() !!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) !!$ if (info /= 0) then !!$ info = psb_err_internal_error_ -!!$ call psb_errpush(info,'c_gpu_multi_dot_v') +!!$ call psb_errpush(info,'c_cuda_multi_dot_v') !!$ end if !!$ !!$ class default @@ -1529,11 +1529,11 @@ contains !!$ res = y%dot(n,x%v) !!$ end select !!$ -!!$ end function c_gpu_multi_dot_v +!!$ end function c_cuda_multi_dot_v !!$ -!!$ function c_gpu_multi_dot_a(n,x,y) result(res) +!!$ function c_cuda_multi_dot_a(n,x,y) result(res) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ complex(psb_spk_), intent(in) :: y(:) !!$ integer(psb_ipk_), intent(in) :: n !!$ complex(psb_spk_) :: res @@ -1542,14 +1542,14 @@ contains !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,y,1,x%v,1) !!$ -!!$ end function c_gpu_multi_dot_a +!!$ end function c_cuda_multi_dot_a !!$ -!!$ subroutine c_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine c_cuda_multi_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ class(psb_c_base_multivect_type), intent(inout) :: x -!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ class(psb_c_multivect_cuda), intent(inout) :: y !!$ complex(psb_spk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: nx, ny @@ -1562,7 +1562,7 @@ contains !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() -!!$ type is (psb_c_multivect_gpu) +!!$ type is (psb_c_multivect_cuda) !!$ ! Do something different here !!$ if ((beta /= dzero).and.y%is_host())& !!$ & call y%sync() @@ -1581,27 +1581,27 @@ contains !!$ call y%axpby(m,alpha,x%v,beta,info) !!$ end select !!$ -!!$ end subroutine c_gpu_multi_axpby_v +!!$ end subroutine c_cuda_multi_axpby_v !!$ -!!$ subroutine c_gpu_multi_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine c_cuda_multi_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ complex(psb_spk_), intent(in) :: x(:) -!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ class(psb_c_multivect_cuda), intent(inout) :: y !!$ complex(psb_spk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ if (y%is_dev()) call y%sync() !!$ call psb_geaxpby(m,alpha,x,beta,y%v,info) !!$ call y%set_host() -!!$ end subroutine c_gpu_multi_axpby_a +!!$ end subroutine c_cuda_multi_axpby_a !!$ -!!$ subroutine c_gpu_multi_mlt_v(x, y, info) +!!$ subroutine c_cuda_multi_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_c_base_multivect_type), intent(inout) :: x -!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ class(psb_c_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ integer(psb_ipk_) :: i, n @@ -1615,7 +1615,7 @@ contains !!$ y%v(i) = y%v(i) * xx%v(i) !!$ end do !!$ call y%set_host() -!!$ type is (psb_c_multivect_gpu) +!!$ type is (psb_c_multivect_cuda) !!$ ! Do something different here !!$ if (y%is_host()) call y%sync() !!$ if (xx%is_host()) call xx%sync() @@ -1627,13 +1627,13 @@ contains !!$ call y%set_host() !!$ end select !!$ -!!$ end subroutine c_gpu_multi_mlt_v +!!$ end subroutine c_cuda_multi_mlt_v !!$ -!!$ subroutine c_gpu_multi_mlt_a(x, y, info) +!!$ subroutine c_cuda_multi_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_spk_), intent(in) :: x(:) -!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ class(psb_c_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1641,15 +1641,15 @@ contains !!$ call y%sync() !!$ call y%psb_c_base_multivect_type%mlt(x,info) !!$ call y%set_host() -!!$ end subroutine c_gpu_multi_mlt_a +!!$ end subroutine c_cuda_multi_mlt_a !!$ -!!$ subroutine c_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine c_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_spk_), intent(in) :: alpha,beta !!$ complex(psb_spk_), intent(in) :: x(:) !!$ complex(psb_spk_), intent(in) :: y(:) -!!$ class(psb_c_multivect_gpu), intent(inout) :: z +!!$ class(psb_c_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1657,16 +1657,16 @@ contains !!$ if (z%is_dev()) call z%sync() !!$ call z%psb_c_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() -!!$ end subroutine c_gpu_multi_mlt_a_2 +!!$ end subroutine c_cuda_multi_mlt_a_2 !!$ -!!$ subroutine c_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ subroutine c_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod !!$ implicit none !!$ complex(psb_spk_), intent(in) :: alpha,beta !!$ class(psb_c_base_multivect_type), intent(inout) :: x !!$ class(psb_c_base_multivect_type), intent(inout) :: y -!!$ class(psb_c_multivect_gpu), intent(inout) :: z +!!$ class(psb_c_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ character(len=1), intent(in), optional :: conjgx, conjgy !!$ integer(psb_ipk_) :: i, n @@ -1689,9 +1689,9 @@ contains !!$ ! !!$ info = 0 !!$ select type(xx => x) -!!$ type is (psb_c_multivect_gpu) +!!$ type is (psb_c_multivect_cuda) !!$ select type (yy => y) -!!$ type is (psb_c_multivect_gpu) +!!$ type is (psb_c_multivect_cuda) !!$ if (xx%is_host()) call xx%sync() !!$ if (yy%is_host()) call yy%sync() !!$ ! Z state is irrelevant: it will be done on the GPU. @@ -1711,11 +1711,11 @@ contains !!$ call z%psb_c_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() !!$ end select -!!$ end subroutine c_gpu_multi_mlt_v_2 +!!$ end subroutine c_cuda_multi_mlt_v_2 - subroutine c_gpu_multi_set_scal(x,val) - class(psb_c_multivect_gpu), intent(inout) :: x + subroutine c_cuda_multi_set_scal(x,val) + class(psb_c_multivect_cuda), intent(inout) :: x complex(psb_spk_), intent(in) :: val integer(psb_ipk_) :: info @@ -1723,10 +1723,10 @@ contains if (x%is_dev()) call x%sync() call x%psb_c_base_multivect_type%set_scal(val) call x%set_host() - end subroutine c_gpu_multi_set_scal + end subroutine c_cuda_multi_set_scal - subroutine c_gpu_multi_set_vect(x,val) - class(psb_c_multivect_gpu), intent(inout) :: x + subroutine c_cuda_multi_set_vect(x,val) + class(psb_c_multivect_cuda), intent(inout) :: x complex(psb_spk_), intent(in) :: val(:,:) integer(psb_ipk_) :: nr integer(psb_ipk_) :: info @@ -1735,24 +1735,24 @@ contains call x%psb_c_base_multivect_type%set_vect(val) call x%set_host() - end subroutine c_gpu_multi_set_vect + end subroutine c_cuda_multi_set_vect -!!$ subroutine c_gpu_multi_scal(alpha, x) +!!$ subroutine c_cuda_multi_scal(alpha, x) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ complex(psb_spk_), intent (in) :: alpha !!$ !!$ if (x%is_dev()) call x%sync() !!$ call x%psb_c_base_multivect_type%scal(alpha) !!$ call x%set_host() -!!$ end subroutine c_gpu_multi_scal +!!$ end subroutine c_cuda_multi_scal !!$ !!$ -!!$ function c_gpu_multi_nrm2(n,x) result(res) +!!$ function c_cuda_multi_nrm2(n,x) result(res) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ integer(psb_ipk_) :: info @@ -1760,36 +1760,36 @@ contains !!$ if (x%is_host()) call x%sync() !!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) !!$ -!!$ end function c_gpu_multi_nrm2 +!!$ end function c_cuda_multi_nrm2 !!$ -!!$ function c_gpu_multi_amax(n,x) result(res) +!!$ function c_cuda_multi_amax(n,x) result(res) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = maxval(abs(x%v(1:n))) !!$ -!!$ end function c_gpu_multi_amax +!!$ end function c_cuda_multi_amax !!$ -!!$ function c_gpu_multi_asum(n,x) result(res) +!!$ function c_cuda_multi_asum(n,x) result(res) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = sum(abs(x%v(1:n))) !!$ -!!$ end function c_gpu_multi_asum +!!$ end function c_cuda_multi_asum - subroutine c_gpu_multi_all(m,n, x, info) + subroutine c_cuda_multi_all(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_multivect_gpu), intent(out) :: x + class(psb_c_multivect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(m,n,x%v,info,pad=czero) @@ -1799,26 +1799,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'c_gpu_multi_all',& + call psb_errpush(info,'c_cuda_multi_all',& & i_err=(/m,n,n,n,n/)) end if - end subroutine c_gpu_multi_all + end subroutine c_cuda_multi_all - subroutine c_gpu_multi_zero(x) + subroutine c_cuda_multi_zero(x) use psi_serial_mod implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine c_gpu_multi_zero + end subroutine c_cuda_multi_zero - subroutine c_gpu_multi_asb(m,n, x, info) + subroutine c_cuda_multi_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nd, nc @@ -1838,12 +1838,12 @@ contains call x%set_host() end if end if - end subroutine c_gpu_multi_asb + end subroutine c_cuda_multi_asb - subroutine c_gpu_multi_sync_space(x,info) + subroutine c_cuda_multi_sync_space(x,info) use psb_realloc_mod implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: mh,nh,md,nd @@ -1896,11 +1896,11 @@ contains end if - end subroutine c_gpu_multi_sync_space + end subroutine c_cuda_multi_sync_space - subroutine c_gpu_multi_sync(x) + subroutine c_cuda_multi_sync(x) implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -1916,16 +1916,16 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'c_gpu_multi_sync') + call psb_errpush(info,'c_cuda_multi_sync') end if - end subroutine c_gpu_multi_sync + end subroutine c_cuda_multi_sync - subroutine c_gpu_multi_free(x, info) + subroutine c_cuda_multi_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 @@ -1940,13 +1940,13 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine c_gpu_multi_free + end subroutine c_cuda_multi_free - subroutine c_gpu_multi_vect_finalize(x) + subroutine c_cuda_multi_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_c_multivect_gpu), intent(inout) :: x + type(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 @@ -1961,12 +1961,12 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine c_gpu_multi_vect_finalize + end subroutine c_cuda_multi_vect_finalize - subroutine c_gpu_multi_ins(n,irl,val,dupl,x,info) + subroutine c_cuda_multi_ins(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) complex(psb_spk_), intent(in) :: val(:,:) @@ -1979,11 +1979,11 @@ contains call x%psb_c_base_multivect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine c_gpu_multi_ins + end subroutine c_cuda_multi_ins #endif -end module psb_c_gpu_multivect_mod +end module psb_c_cuda_multivect_mod diff --git a/cuda/psb_gpu_env_mod.F90 b/cuda/psb_cuda_env_mod.F90 similarity index 74% rename from cuda/psb_gpu_env_mod.F90 rename to cuda/psb_cuda_env_mod.F90 index 0473f4ac..0d1d4ced 100644 --- a/cuda/psb_gpu_env_mod.F90 +++ b/cuda/psb_cuda_env_mod.F90 @@ -30,30 +30,30 @@ ! -module psb_gpu_env_mod +module psb_cuda_env_mod use psb_const_mod use iso_c_binding use base_cusparse_mod -! interface psb_gpu_init -! module procedure psb_gpu_init +! interface psb_cuda_init +! module procedure psb_cuda_init ! end interface #if defined(HAVE_CUDA) use core_mod interface - function psb_gpuGetHandle() & - & result(res) bind(c,name='psb_gpuGetHandle') + function psb_cudaGetHandle() & + & result(res) bind(c,name='psb_cudaGetHandle') use iso_c_binding type(c_ptr) :: res - end function psb_gpuGetHandle + end function psb_cudaGetHandle end interface interface - function psb_gpuGetStream() & - & result(res) bind(c,name='psb_gpuGetStream') + function psb_cudaGetStream() & + & result(res) bind(c,name='psb_cudaGetStream') use iso_c_binding type(c_ptr) :: res - end function psb_gpuGetStream + end function psb_cudaGetStream end interface interface @@ -66,11 +66,11 @@ module psb_gpu_env_mod end interface interface - function psb_cuda_getDeviceCount() & + function psb_cuda_inner_getDeviceCount() & & result(res) bind(c,name='getDeviceCount') use iso_c_binding integer(c_int) :: res - end function psb_cuda_getDeviceCount + end function psb_cuda_inner_getDeviceCount end interface interface @@ -92,39 +92,39 @@ module psb_gpu_env_mod interface - subroutine psb_gpuCreateHandle() & - & bind(c,name='psb_gpuCreateHandle') + subroutine psb_cudaCreateHandle() & + & bind(c,name='psb_cudaCreateHandle') use iso_c_binding - end subroutine psb_gpuCreateHandle + end subroutine psb_cudaCreateHandle end interface interface - subroutine psb_gpuSetStream(handle,stream) & - & bind(c,name='psb_gpuSetStream') + subroutine psb_cudaSetStream(handle,stream) & + & bind(c,name='psb_cudaSetStream') use iso_c_binding type(c_ptr), value :: handle, stream - end subroutine psb_gpuSetStream + end subroutine psb_cudaSetStream end interface interface - subroutine psb_gpuDestroyHandle() & - & bind(c,name='psb_gpuDestroyHandle') + subroutine psb_cudaDestroyHandle() & + & bind(c,name='psb_cudaDestroyHandle') use iso_c_binding - end subroutine psb_gpuDestroyHandle + end subroutine psb_cudaDestroyHandle end interface interface - subroutine psb_cudaReset() & + subroutine psb_cuda_innerReset() & & bind(c,name='cudaReset') use iso_c_binding - end subroutine psb_cudaReset + end subroutine psb_cuda_innerReset end interface interface - subroutine psb_gpuClose() & + subroutine psb_cuda_innerClose() & & bind(c,name='gpuClose') use iso_c_binding - end subroutine psb_gpuClose + end subroutine psb_cuda_innerClose end interface #endif @@ -180,15 +180,15 @@ module psb_gpu_env_mod Contains - function psb_gpu_get_maybe_free_buffer() result(res) + function psb_cuda_get_maybe_free_buffer() result(res) logical :: res res = gpu_do_maybe_free_buffer - end function psb_gpu_get_maybe_free_buffer + end function psb_cuda_get_maybe_free_buffer - subroutine psb_gpu_set_maybe_free_buffer(val) + subroutine psb_cuda_set_maybe_free_buffer(val) logical, intent(in) :: val gpu_do_maybe_free_buffer = val - end subroutine psb_gpu_set_maybe_free_buffer + end subroutine psb_cuda_set_maybe_free_buffer ! !!!!!!!!!!!!!!!!!!!!!! ! @@ -197,7 +197,7 @@ Contains ! !!!!!!!!!!!!!!!!!!!!!! - subroutine psb_gpu_init(ctxt,dev) + subroutine psb_cuda_init(ctxt,dev) use psb_penv_mod use psb_const_mod use psb_error_mod @@ -230,10 +230,10 @@ Contains end if if (info == 0) info = initFcusparse() if (info /= 0) then - call psb_errpush(psb_err_internal_error_,'psb_gpu_init') + call psb_errpush(psb_err_internal_error_,'psb_cuda_init') goto 9999 end if - call psb_gpuCreateHandle() + call psb_cudaCreateHandle() #endif call psb_erractionrestore(err_act) return @@ -241,80 +241,80 @@ Contains return - end subroutine psb_gpu_init + end subroutine psb_cuda_init - subroutine psb_gpu_DeviceSync() + subroutine psb_cuda_DeviceSync() #if defined(HAVE_CUDA) call psb_cudaSync() #endif - end subroutine psb_gpu_DeviceSync + end subroutine psb_cuda_DeviceSync - function psb_gpu_getDeviceCount() result(res) + function psb_cuda_getDeviceCount() result(res) integer :: res #if defined(HAVE_CUDA) - res = psb_cuda_getDeviceCount() + res = psb_cuda_inner_getDeviceCount() #else res = 0 #endif - end function psb_gpu_getDeviceCount + end function psb_cuda_getDeviceCount - subroutine psb_gpu_exit() + subroutine psb_cuda_exit() integer :: res res = closeFcusparse() - call psb_gpuClose() - call psb_cudaReset() - end subroutine psb_gpu_exit + call psb_cuda_innerClose() + call psb_cuda_innerReset() + end subroutine psb_cuda_exit - function psb_gpu_DeviceHasUVA() result(res) + function psb_cuda_DeviceHasUVA() result(res) logical :: res res = (psb_C_DeviceHasUVA() == 1) - end function psb_gpu_DeviceHasUVA + end function psb_cuda_DeviceHasUVA - function psb_gpu_MultiProcessors() result(res) + function psb_cuda_MultiProcessors() result(res) integer(psb_ipk_) :: res res = psb_C_get_MultiProcessors() - end function psb_gpu_MultiProcessors + end function psb_cuda_MultiProcessors - function psb_gpu_MaxRegistersPerBlock() result(res) + function psb_cuda_MaxRegistersPerBlock() result(res) integer(psb_ipk_) :: res res = psb_C_get_MaxRegistersPerBlock() - end function psb_gpu_MaxRegistersPerBlock + end function psb_cuda_MaxRegistersPerBlock - function psb_gpu_MaxThreadsPerMP() result(res) + function psb_cuda_MaxThreadsPerMP() result(res) integer(psb_ipk_) :: res res = psb_C_get_MaxThreadsPerMP() - end function psb_gpu_MaxThreadsPerMP + end function psb_cuda_MaxThreadsPerMP - function psb_gpu_WarpSize() result(res) + function psb_cuda_WarpSize() result(res) integer(psb_ipk_) :: res res = psb_C_get_WarpSize() - end function psb_gpu_WarpSize + end function psb_cuda_WarpSize - function psb_gpu_MemoryClockRate() result(res) + function psb_cuda_MemoryClockRate() result(res) integer(psb_ipk_) :: res res = psb_C_get_MemoryClockRate() - end function psb_gpu_MemoryClockRate + end function psb_cuda_MemoryClockRate - function psb_gpu_MemoryBusWidth() result(res) + function psb_cuda_MemoryBusWidth() result(res) integer(psb_ipk_) :: res res = psb_C_get_MemoryBusWidth() - end function psb_gpu_MemoryBusWidth + end function psb_cuda_MemoryBusWidth - function psb_gpu_MemoryPeakBandwidth() result(res) + function psb_cuda_MemoryPeakBandwidth() result(res) real(psb_dpk_) :: res ! Formula here: 2*ClockRate(KHz)*BusWidth(bit) ! normalization: bit/byte, KHz/MHz ! output: MBytes/s res = 2.d0*0.125d0*1.d-3*psb_C_get_MemoryBusWidth()*psb_C_get_MemoryClockRate() - end function psb_gpu_MemoryPeakBandwidth + end function psb_cuda_MemoryPeakBandwidth - function psb_gpu_DeviceName() result(res) + function psb_cuda_DeviceName() result(res) character(len=256) :: res character :: cstring(256) call psb_C_cpy_NameString(cstring) call stringc2f(cstring,res) - end function psb_gpu_DeviceName + end function psb_cuda_DeviceName subroutine stringc2f(cstring,fstring) @@ -337,4 +337,4 @@ Contains return end subroutine stringc2f -end module psb_gpu_env_mod +end module psb_cuda_env_mod diff --git a/cuda/psb_gpu_mod.F90 b/cuda/psb_cuda_mod.F90 similarity index 65% rename from cuda/psb_gpu_mod.F90 rename to cuda/psb_cuda_mod.F90 index 7eba8062..81ce3e31 100644 --- a/cuda/psb_gpu_mod.F90 +++ b/cuda/psb_cuda_mod.F90 @@ -30,60 +30,60 @@ ! -module psb_gpu_mod +module psb_cuda_mod use psb_const_mod - use psb_gpu_env_mod + use psb_cuda_env_mod - use psb_i_gpu_vect_mod - use psb_s_gpu_vect_mod - use psb_d_gpu_vect_mod - use psb_c_gpu_vect_mod - use psb_z_gpu_vect_mod + use psb_i_cuda_vect_mod + use psb_s_cuda_vect_mod + use psb_d_cuda_vect_mod + use psb_c_cuda_vect_mod + use psb_z_cuda_vect_mod - use psb_i_gpu_multivect_mod - use psb_s_gpu_multivect_mod - use psb_d_gpu_multivect_mod - use psb_c_gpu_multivect_mod - use psb_z_gpu_multivect_mod + use psb_i_cuda_multivect_mod + use psb_s_cuda_multivect_mod + use psb_d_cuda_multivect_mod + use psb_c_cuda_multivect_mod + use psb_z_cuda_multivect_mod use psb_d_ell_mat_mod - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod use psb_s_ell_mat_mod - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod use psb_z_ell_mat_mod - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod use psb_c_ell_mat_mod - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod use psb_s_hll_mat_mod - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod use psb_d_hll_mat_mod - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod use psb_c_hll_mat_mod - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod use psb_z_hll_mat_mod - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod - use psb_s_csrg_mat_mod - use psb_d_csrg_mat_mod - use psb_c_csrg_mat_mod - use psb_z_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #if CUDA_SHORT_VERSION <= 10 - use psb_s_hybg_mat_mod - use psb_d_hybg_mat_mod - use psb_c_hybg_mat_mod - use psb_z_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif - use psb_d_diag_mat_mod - use psb_d_hdiag_mat_mod + use psb_d_cuda_diag_mat_mod + use psb_d_cuda_hdiag_mat_mod - use psb_s_dnsg_mat_mod - use psb_d_dnsg_mat_mod - use psb_c_dnsg_mat_mod - use psb_z_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod - use psb_s_hdiag_mat_mod - ! use psb_s_diag_mat_mod + use psb_s_cuda_hdiag_mat_mod + ! use psb_s_cuda_diag_mat_mod -end module psb_gpu_mod +end module psb_cuda_mod diff --git a/cuda/psb_d_csrg_mat_mod.F90 b/cuda/psb_d_csrg_mat_mod.F90 deleted file mode 100644 index 177c7440..00000000 --- a/cuda/psb_d_csrg_mat_mod.F90 +++ /dev/null @@ -1,393 +0,0 @@ -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! - - -module psb_d_csrg_mat_mod - - use iso_c_binding - use psb_d_mat_mod - use cusparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_d_csr_sparse_mat) :: psb_d_csrg_sparse_mat - ! - ! cuSPARSE 4.0 CSR format. - ! - ! - ! - ! - ! -#ifdef HAVE_SPGPU - type(d_Cmat) :: deviceMat - integer(psb_ipk_) :: devstate = is_host - - contains - procedure, nopass :: get_fmt => d_csrg_get_fmt - procedure, pass(a) :: sizeof => d_csrg_sizeof - procedure, pass(a) :: vect_mv => psb_d_csrg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_csrg_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_csrg_csmm - procedure, pass(a) :: csmv => psb_d_csrg_csmv - procedure, pass(a) :: scals => psb_d_csrg_scals - procedure, pass(a) :: scalv => psb_d_csrg_scal - procedure, pass(a) :: reallocate_nz => psb_d_csrg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_csrg_allocate_mnnz - ! Note: we do *not* need the TO methods, because the parent type - ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_csrg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_cp_csrg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_csrg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_mv_csrg_from_fmt - procedure, pass(a) :: free => d_csrg_free - procedure, pass(a) :: mold => psb_d_csrg_mold - procedure, pass(a) :: is_host => d_csrg_is_host - procedure, pass(a) :: is_dev => d_csrg_is_dev - procedure, pass(a) :: is_sync => d_csrg_is_sync - procedure, pass(a) :: set_host => d_csrg_set_host - procedure, pass(a) :: set_dev => d_csrg_set_dev - procedure, pass(a) :: set_sync => d_csrg_set_sync - procedure, pass(a) :: sync => d_csrg_sync - procedure, pass(a) :: to_gpu => psb_d_csrg_to_gpu - procedure, pass(a) :: from_gpu => psb_d_csrg_from_gpu - final :: d_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_csrg_mold -#endif - end type psb_d_csrg_sparse_mat - -#ifdef HAVE_SPGPU - private :: d_csrg_get_nzeros, d_csrg_free, d_csrg_get_fmt, & - & d_csrg_get_size, d_csrg_sizeof, d_csrg_get_nz_row - - - interface - subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_csrg_inner_vect_sv - end interface - - - interface - subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_csrg_vect_mv - end interface - - interface - subroutine psb_d_csrg_reallocate_nz(nz,a) - import :: psb_d_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: nz - class(psb_d_csrg_sparse_mat), intent(inout) :: a - end subroutine psb_d_csrg_reallocate_nz - end interface - - interface - subroutine psb_d_csrg_allocate_mnnz(m,n,a,nz) - import :: psb_d_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: m,n - class(psb_d_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_csrg_allocate_mnnz - end interface - - interface - subroutine psb_d_csrg_mold(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_csrg_mold - end interface - - interface - subroutine psb_d_csrg_to_gpu(a,info, nzrm) - import :: psb_d_csrg_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_d_csrg_to_gpu - end interface - - interface - subroutine psb_d_csrg_from_gpu(a,info) - import :: psb_d_csrg_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_csrg_from_gpu - end interface - - interface - subroutine psb_d_cp_csrg_from_coo(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_csrg_from_coo - end interface - - interface - subroutine psb_d_cp_csrg_from_fmt(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_csrg_from_fmt - end interface - - interface - subroutine psb_d_mv_csrg_from_coo(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_csrg_from_coo - end interface - - interface - subroutine psb_d_mv_csrg_from_fmt(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_csrg_from_fmt - end interface - - interface - subroutine psb_d_csrg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:) - real(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_csrg_csmv - end interface - interface - subroutine psb_d_csrg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - real(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_csrg_csmm - end interface - - interface - subroutine psb_d_csrg_scal(d,a,info,side) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side - end subroutine psb_d_csrg_scal - end interface - - interface - subroutine psb_d_csrg_scals(d,a,info) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_csrg_scals - end interface - - -contains - - ! == =================================== - ! - ! - ! - ! Getters - ! - ! - ! - ! - ! - ! == =================================== - - - function d_csrg_sizeof(a) result(res) - implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - res = 8 - res = res + psb_sizeof_dp * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - ! Should we account for the shadow data structure - ! on the GPU device side? - ! res = 2*res - - end function d_csrg_sizeof - - function d_csrg_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSRG' - end function d_csrg_get_fmt - - - - ! == =================================== - ! - ! - ! - ! Data management - ! - ! - ! - ! - ! - ! == =================================== - - - subroutine d_csrg_set_host(a) - implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine d_csrg_set_host - - subroutine d_csrg_set_dev(a) - implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine d_csrg_set_dev - - subroutine d_csrg_set_sync(a) - implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine d_csrg_set_sync - - function d_csrg_is_dev(a) result(res) - implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function d_csrg_is_dev - - function d_csrg_is_host(a) result(res) - implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function d_csrg_is_host - - function d_csrg_is_sync(a) result(res) - implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function d_csrg_is_sync - - - subroutine d_csrg_sync(a) - implicit none - class(psb_d_csrg_sparse_mat), target, intent(in) :: a - class(psb_d_csrg_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (tmpa%is_host()) then - call tmpa%to_gpu(info) - else if (tmpa%is_dev()) then - call tmpa%from_gpu(info) - end if - call tmpa%set_sync() - return - - end subroutine d_csrg_sync - - subroutine d_csrg_free(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - class(psb_d_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - call a%psb_d_csr_sparse_mat%free() - - return - - end subroutine d_csrg_free - - subroutine d_csrg_finalize(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - type(psb_d_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - - return - - end subroutine d_csrg_finalize - -#else - interface - subroutine psb_d_csrg_mold(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_csrg_mold - end interface - -#endif - -end module psb_d_csrg_mat_mod diff --git a/cuda/psb_d_cuda_csrg_mat_mod.F90 b/cuda/psb_d_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..465c16a7 --- /dev/null +++ b/cuda/psb_d_cuda_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_csrg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_csr_sparse_mat) :: psb_d_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(d_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => d_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_d_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_d_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => d_cuda_csrg_free + procedure, pass(a) :: mold => psb_d_cuda_csrg_mold + procedure, pass(a) :: is_host => d_cuda_csrg_is_host + procedure, pass(a) :: is_dev => d_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => d_cuda_csrg_is_sync + procedure, pass(a) :: set_host => d_cuda_csrg_set_host + procedure, pass(a) :: set_dev => d_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => d_cuda_csrg_set_sync + procedure, pass(a) :: sync => d_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_d_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_d_cuda_csrg_from_gpu + final :: d_cuda_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_cuda_csrg_mold +#endif + end type psb_d_cuda_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_cuda_csrg_get_nzeros, d_cuda_csrg_free, d_cuda_csrg_get_fmt, & + & d_cuda_csrg_get_size, d_cuda_csrg_sizeof, d_cuda_csrg_get_nz_row + + + interface + subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_d_cuda_csrg_reallocate_nz(nz,a) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_d_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_d_cuda_csrg_mold(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_mold + end interface + + interface + subroutine psb_d_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_d_cuda_csrg_from_gpu(a,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_d_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_d_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_d_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_d_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_csmv + end interface + interface + subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_csmm + end interface + + interface + subroutine psb_d_cuda_csrg_scal(d,a,info,side) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_cuda_csrg_scal + end interface + + interface + subroutine psb_d_cuda_csrg_scals(d,a,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_cuda_csrg_sizeof + + function d_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function d_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine d_cuda_csrg_set_host(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_cuda_csrg_set_host + + subroutine d_cuda_csrg_set_dev(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_cuda_csrg_set_dev + + subroutine d_cuda_csrg_set_sync(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_cuda_csrg_set_sync + + function d_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_cuda_csrg_is_dev + + function d_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_cuda_csrg_is_host + + function d_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_cuda_csrg_is_sync + + + subroutine d_cuda_csrg_sync(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine d_cuda_csrg_sync + + subroutine d_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_d_csr_sparse_mat%free() + + return + + end subroutine d_cuda_csrg_free + + subroutine d_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine d_cuda_csrg_finalize + +#else + interface + subroutine psb_d_cuda_csrg_mold(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_mold + end interface + +#endif + +end module psb_d_cuda_csrg_mat_mod diff --git a/cuda/psb_d_diag_mat_mod.F90 b/cuda/psb_d_cuda_diag_mat_mod.F90 similarity index 52% rename from cuda/psb_d_diag_mat_mod.F90 rename to cuda/psb_d_cuda_diag_mat_mod.F90 index 564f7a13..1d55faa0 100644 --- a/cuda/psb_d_diag_mat_mod.F90 +++ b/cuda/psb_d_cuda_diag_mat_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_d_diag_mat_mod +module psb_d_cuda_diag_mat_mod use iso_c_binding use psb_base_mod use psb_d_dia_mat_mod - type, extends(psb_d_dia_sparse_mat) :: psb_d_diag_sparse_mat + type, extends(psb_d_dia_sparse_mat) :: psb_d_cuda_diag_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -48,170 +48,170 @@ module psb_d_diag_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => d_diag_get_fmt - procedure, pass(a) :: sizeof => d_diag_sizeof - procedure, pass(a) :: vect_mv => psb_d_diag_vect_mv -! procedure, pass(a) :: csmm => psb_d_diag_csmm - procedure, pass(a) :: csmv => psb_d_diag_csmv -! procedure, pass(a) :: in_vect_sv => psb_d_diag_inner_vect_sv -! procedure, pass(a) :: scals => psb_d_diag_scals -! procedure, pass(a) :: scalv => psb_d_diag_scal -! procedure, pass(a) :: reallocate_nz => psb_d_diag_reallocate_nz -! procedure, pass(a) :: allocate_mnnz => psb_d_diag_allocate_mnnz + procedure, nopass :: get_fmt => d_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => d_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_d_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_d_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_d_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_d_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_d_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_d_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_d_cuda_diag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_diag_from_coo -! procedure, pass(a) :: cp_from_fmt => psb_d_cp_diag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_diag_from_coo -! procedure, pass(a) :: mv_from_fmt => psb_d_mv_diag_from_fmt - procedure, pass(a) :: free => d_diag_free - procedure, pass(a) :: mold => psb_d_diag_mold - procedure, pass(a) :: to_gpu => psb_d_diag_to_gpu - final :: d_diag_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => d_cuda_diag_free + procedure, pass(a) :: mold => psb_d_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_diag_to_gpu + final :: d_cuda_diag_finalize #else contains - procedure, pass(a) :: mold => psb_d_diag_mold + procedure, pass(a) :: mold => psb_d_cuda_diag_mold #endif - end type psb_d_diag_sparse_mat + end type psb_d_cuda_diag_sparse_mat #ifdef HAVE_SPGPU - private :: d_diag_get_nzeros, d_diag_free, d_diag_get_fmt, & - & d_diag_get_size, d_diag_sizeof, d_diag_get_nz_row + private :: d_cuda_diag_get_nzeros, d_cuda_diag_free, d_cuda_diag_get_fmt, & + & d_cuda_diag_get_size, d_cuda_diag_sizeof, d_cuda_diag_get_nz_row interface - subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_diag_vect_mv + end subroutine psb_d_cuda_diag_vect_mv end interface interface - subroutine psb_d_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_d_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_diag_inner_vect_sv + end subroutine psb_d_cuda_diag_inner_vect_sv end interface interface - subroutine psb_d_diag_reallocate_nz(nz,a) - import :: psb_d_diag_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_diag_reallocate_nz(nz,a) + import :: psb_d_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_d_diag_sparse_mat), intent(inout) :: a - end subroutine psb_d_diag_reallocate_nz + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_diag_reallocate_nz end interface interface - subroutine psb_d_diag_allocate_mnnz(m,n,a,nz) - import :: psb_d_diag_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_diag_allocate_mnnz + end subroutine psb_d_cuda_diag_allocate_mnnz end interface interface - subroutine psb_d_diag_mold(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_mold(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_diag_mold + end subroutine psb_d_cuda_diag_mold end interface interface - subroutine psb_d_diag_to_gpu(a,info, nzrm) - import :: psb_d_diag_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_d_cuda_diag_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_d_diag_to_gpu + end subroutine psb_d_cuda_diag_to_gpu end interface interface - subroutine psb_d_cp_diag_from_coo(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_diag_from_coo(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_diag_from_coo + end subroutine psb_d_cuda_cp_diag_from_coo end interface interface - subroutine psb_d_cp_diag_from_fmt(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_diag_from_fmt + end subroutine psb_d_cuda_cp_diag_from_fmt end interface interface - subroutine psb_d_mv_diag_from_coo(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_diag_from_coo(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_diag_from_coo + end subroutine psb_d_cuda_mv_diag_from_coo end interface interface - subroutine psb_d_mv_diag_from_fmt(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_diag_from_fmt + end subroutine psb_d_cuda_mv_diag_from_fmt end interface interface - subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_diag_csmv + end subroutine psb_d_cuda_diag_csmv end interface interface - subroutine psb_d_diag_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_diag_csmm + end subroutine psb_d_cuda_diag_csmm end interface interface - subroutine psb_d_diag_scal(d,a,info, side) - import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_diag_scal(d,a,info, side) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_d_diag_scal + end subroutine psb_d_cuda_diag_scal end interface interface - subroutine psb_d_diag_scals(d,a,info) - import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_diag_scals(d,a,info) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_diag_scals + end subroutine psb_d_cuda_diag_scals end interface @@ -230,9 +230,9 @@ contains ! == =================================== - function d_diag_sizeof(a) result(res) + function d_cuda_diag_sizeof(a) result(res) implicit none - class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function d_diag_sizeof + end function d_cuda_diag_sizeof - function d_diag_get_fmt() result(res) + function d_cuda_diag_get_fmt() result(res) implicit none character(len=5) :: res res = 'DIAG' - end function d_diag_get_fmt + end function d_cuda_diag_get_fmt @@ -265,11 +265,11 @@ contains ! ! == =================================== - subroutine d_diag_free(a) + subroutine d_cuda_diag_free(a) use diagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) @@ -278,31 +278,31 @@ contains return - end subroutine d_diag_free + end subroutine d_cuda_diag_free - subroutine d_diag_finalize(a) + subroutine d_cuda_diag_finalize(a) use diagdev_mod implicit none - type(psb_d_diag_sparse_mat), intent(inout) :: a + type(psb_d_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine d_diag_finalize + end subroutine d_cuda_diag_finalize #else interface - subroutine psb_d_diag_mold(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_mold(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_diag_mold + end subroutine psb_d_cuda_diag_mold end interface #endif -end module psb_d_diag_mat_mod +end module psb_d_cuda_diag_mat_mod diff --git a/cuda/psb_d_dnsg_mat_mod.F90 b/cuda/psb_d_cuda_dnsg_mat_mod.F90 similarity index 51% rename from cuda/psb_d_dnsg_mat_mod.F90 rename to cuda/psb_d_cuda_dnsg_mat_mod.F90 index 966c2311..bb24eb1a 100644 --- a/cuda/psb_d_dnsg_mat_mod.F90 +++ b/cuda/psb_d_cuda_dnsg_mat_mod.F90 @@ -30,14 +30,14 @@ ! -module psb_d_dnsg_mat_mod +module psb_d_cuda_dnsg_mat_mod use iso_c_binding use psb_d_mat_mod use psb_d_dns_mat_mod use dnsdev_mod - type, extends(psb_d_dns_sparse_mat) :: psb_d_dnsg_sparse_mat + type, extends(psb_d_dns_sparse_mat) :: psb_d_cuda_dnsg_sparse_mat ! ! ITPACK/DNS format, extended. ! We are adding here the routines to create a copy of the data @@ -49,169 +49,169 @@ module psb_d_dnsg_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => d_dnsg_get_fmt - ! procedure, pass(a) :: sizeof => d_dnsg_sizeof - procedure, pass(a) :: vect_mv => psb_d_dnsg_vect_mv -!!$ procedure, pass(a) :: csmm => psb_d_dnsg_csmm -!!$ procedure, pass(a) :: csmv => psb_d_dnsg_csmv -!!$ procedure, pass(a) :: in_vect_sv => psb_d_dnsg_inner_vect_sv -!!$ procedure, pass(a) :: scals => psb_d_dnsg_scals -!!$ procedure, pass(a) :: scalv => psb_d_dnsg_scal -!!$ procedure, pass(a) :: reallocate_nz => psb_d_dnsg_reallocate_nz -!!$ procedure, pass(a) :: allocate_mnnz => psb_d_dnsg_allocate_mnnz + procedure, nopass :: get_fmt => d_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => d_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_d_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_d_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_d_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_d_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_d_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_d_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_d_cuda_dnsg_allocate_mnnz ! Note: we *do* need the TO methods, because of the need to invoke SYNC ! - procedure, pass(a) :: cp_from_coo => psb_d_cp_dnsg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_cp_dnsg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_dnsg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_mv_dnsg_from_fmt - procedure, pass(a) :: free => d_dnsg_free - procedure, pass(a) :: mold => psb_d_dnsg_mold - procedure, pass(a) :: to_gpu => psb_d_dnsg_to_gpu - final :: d_dnsg_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => d_cuda_dnsg_free + procedure, pass(a) :: mold => psb_d_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_dnsg_to_gpu + final :: d_cuda_dnsg_finalize #else contains - procedure, pass(a) :: mold => psb_d_dnsg_mold + procedure, pass(a) :: mold => psb_d_cuda_dnsg_mold #endif - end type psb_d_dnsg_sparse_mat + end type psb_d_cuda_dnsg_sparse_mat #ifdef HAVE_SPGPU - private :: d_dnsg_get_nzeros, d_dnsg_free, d_dnsg_get_fmt, & - & d_dnsg_get_size, d_dnsg_get_nz_row + private :: d_cuda_dnsg_get_nzeros, d_cuda_dnsg_free, d_cuda_dnsg_get_fmt, & + & d_cuda_dnsg_get_size, d_cuda_dnsg_get_nz_row interface - subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_dnsg_vect_mv + end subroutine psb_d_cuda_dnsg_vect_mv end interface !!$ !!$ interface -!!$ subroutine psb_d_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_d_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type -!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_d_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_dnsg_inner_vect_sv +!!$ end subroutine psb_d_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_d_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_d_dnsg_reallocate_nz +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_d_dnsg_allocate_mnnz +!!$ end subroutine psb_d_cuda_dnsg_allocate_mnnz !!$ end interface interface - subroutine psb_d_dnsg_mold(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_dnsg_mold(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_dnsg_mold + end subroutine psb_d_cuda_dnsg_mold end interface interface - subroutine psb_d_dnsg_to_gpu(a,info) - import :: psb_d_dnsg_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_dnsg_to_gpu(a,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_dnsg_to_gpu + end subroutine psb_d_cuda_dnsg_to_gpu end interface interface - subroutine psb_d_cp_dnsg_from_coo(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_dnsg_from_coo + end subroutine psb_d_cuda_cp_dnsg_from_coo end interface interface - subroutine psb_d_cp_dnsg_from_fmt(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_dnsg_from_fmt + end subroutine psb_d_cuda_cp_dnsg_from_fmt end interface interface - subroutine psb_d_mv_dnsg_from_coo(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_dnsg_from_coo + end subroutine psb_d_cuda_mv_dnsg_from_coo end interface interface - subroutine psb_d_mv_dnsg_from_fmt(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_dnsg_from_fmt + end subroutine psb_d_cuda_mv_dnsg_from_fmt end interface !!$ interface -!!$ subroutine psb_d_dnsg_csmv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:) !!$ real(psb_dpk_), intent(inout) :: y(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_dnsg_csmv +!!$ end subroutine psb_d_cuda_dnsg_csmv !!$ end interface !!$ interface -!!$ subroutine psb_d_dnsg_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) !!$ real(psb_dpk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_dnsg_csmm +!!$ end subroutine psb_d_cuda_dnsg_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_dnsg_scal(d,a,info, side) -!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ real(psb_dpk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_d_dnsg_scal +!!$ end subroutine psb_d_cuda_dnsg_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_dnsg_scals(d,a,info) -!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ real(psb_dpk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_d_dnsg_scals +!!$ end subroutine psb_d_cuda_dnsg_scals !!$ end interface !!$ @@ -231,11 +231,11 @@ contains - function d_dnsg_get_fmt() result(res) + function d_cuda_dnsg_get_fmt() result(res) implicit none character(len=5) :: res res = 'DNSG' - end function d_dnsg_get_fmt + end function d_cuda_dnsg_get_fmt @@ -251,11 +251,11 @@ contains ! ! == =================================== - subroutine d_dnsg_free(a) + subroutine d_cuda_dnsg_free(a) use dnsdev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) @@ -264,31 +264,31 @@ contains return - end subroutine d_dnsg_free + end subroutine d_cuda_dnsg_free - subroutine d_dnsg_finalize(a) + subroutine d_cuda_dnsg_finalize(a) use dnsdev_mod implicit none - type(psb_d_dnsg_sparse_mat), intent(inout) :: a + type(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine d_dnsg_finalize + end subroutine d_cuda_dnsg_finalize #else interface - subroutine psb_d_dnsg_mold(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_dnsg_mold(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_dnsg_mold + end subroutine psb_d_cuda_dnsg_mold end interface #endif -end module psb_d_dnsg_mat_mod +end module psb_d_cuda_dnsg_mat_mod diff --git a/cuda/psb_d_elg_mat_mod.F90 b/cuda/psb_d_cuda_elg_mat_mod.F90 similarity index 50% rename from cuda/psb_d_elg_mat_mod.F90 rename to cuda/psb_d_cuda_elg_mat_mod.F90 index eac7bb36..1ac47664 100644 --- a/cuda/psb_d_elg_mat_mod.F90 +++ b/cuda/psb_d_cuda_elg_mat_mod.F90 @@ -30,18 +30,18 @@ ! -module psb_d_elg_mat_mod +module psb_d_cuda_elg_mat_mod use iso_c_binding use psb_d_mat_mod use psb_d_ell_mat_mod - use psb_i_gpu_vect_mod + use psb_i_cuda_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_d_ell_sparse_mat) :: psb_d_elg_sparse_mat + type, extends(psb_d_ell_sparse_mat) :: psb_d_cuda_elg_sparse_mat ! ! ITPACK/ELL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,221 +54,221 @@ module psb_d_elg_mat_mod integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => d_elg_get_fmt - procedure, pass(a) :: sizeof => d_elg_sizeof - procedure, pass(a) :: vect_mv => psb_d_elg_vect_mv - procedure, pass(a) :: csmm => psb_d_elg_csmm - procedure, pass(a) :: csmv => psb_d_elg_csmv - procedure, pass(a) :: in_vect_sv => psb_d_elg_inner_vect_sv - procedure, pass(a) :: scals => psb_d_elg_scals - procedure, pass(a) :: scalv => psb_d_elg_scal - procedure, pass(a) :: reallocate_nz => psb_d_elg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_elg_allocate_mnnz - procedure, pass(a) :: reinit => d_elg_reinit + procedure, nopass :: get_fmt => d_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_d_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_d_cuda_elg_scals + procedure, pass(a) :: scalv => psb_d_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => d_cuda_elg_reinit ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_elg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_cp_elg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_elg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_mv_elg_from_fmt - procedure, pass(a) :: free => d_elg_free - procedure, pass(a) :: mold => psb_d_elg_mold - procedure, pass(a) :: csput_a => psb_d_elg_csput_a - procedure, pass(a) :: csput_v => psb_d_elg_csput_v - procedure, pass(a) :: is_host => d_elg_is_host - procedure, pass(a) :: is_dev => d_elg_is_dev - procedure, pass(a) :: is_sync => d_elg_is_sync - procedure, pass(a) :: set_host => d_elg_set_host - procedure, pass(a) :: set_dev => d_elg_set_dev - procedure, pass(a) :: set_sync => d_elg_set_sync - procedure, pass(a) :: sync => d_elg_sync - procedure, pass(a) :: from_gpu => psb_d_elg_from_gpu - procedure, pass(a) :: to_gpu => psb_d_elg_to_gpu - procedure, pass(a) :: asb => psb_d_elg_asb - final :: d_elg_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => d_cuda_elg_free + procedure, pass(a) :: mold => psb_d_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_d_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_d_cuda_elg_csput_v + procedure, pass(a) :: is_host => d_cuda_elg_is_host + procedure, pass(a) :: is_dev => d_cuda_elg_is_dev + procedure, pass(a) :: is_sync => d_cuda_elg_is_sync + procedure, pass(a) :: set_host => d_cuda_elg_set_host + procedure, pass(a) :: set_dev => d_cuda_elg_set_dev + procedure, pass(a) :: set_sync => d_cuda_elg_set_sync + procedure, pass(a) :: sync => d_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_d_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_d_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_d_cuda_elg_asb + final :: d_cuda_elg_finalize #else contains - procedure, pass(a) :: mold => psb_d_elg_mold - procedure, pass(a) :: asb => psb_d_elg_asb + procedure, pass(a) :: mold => psb_d_cuda_elg_mold + procedure, pass(a) :: asb => psb_d_cuda_elg_asb #endif - end type psb_d_elg_sparse_mat + end type psb_d_cuda_elg_sparse_mat #ifdef HAVE_SPGPU - private :: d_elg_get_nzeros, d_elg_free, d_elg_get_fmt, & - & d_elg_get_size, d_elg_sizeof, d_elg_get_nz_row, d_elg_sync + private :: d_cuda_elg_get_nzeros, d_cuda_elg_free, d_cuda_elg_get_fmt, & + & d_cuda_elg_get_size, d_cuda_elg_sizeof, d_cuda_elg_get_nz_row, d_cuda_elg_sync interface - subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_elg_vect_mv + end subroutine psb_d_cuda_elg_vect_mv end interface interface - subroutine psb_d_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_d_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_elg_inner_vect_sv + end subroutine psb_d_cuda_elg_inner_vect_sv end interface interface - subroutine psb_d_elg_reallocate_nz(nz,a) - import :: psb_d_elg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_elg_reallocate_nz(nz,a) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_d_elg_sparse_mat), intent(inout) :: a - end subroutine psb_d_elg_reallocate_nz + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_elg_reallocate_nz end interface interface - subroutine psb_d_elg_allocate_mnnz(m,n,a,nz) - import :: psb_d_elg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_elg_allocate_mnnz + end subroutine psb_d_cuda_elg_allocate_mnnz end interface interface - subroutine psb_d_elg_mold(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_mold(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_mold + end subroutine psb_d_cuda_elg_mold end interface interface - subroutine psb_d_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& & imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_csput_a + end subroutine psb_d_cuda_elg_csput_a end interface interface - subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_d_base_vect_type,& + subroutine psb_d_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_d_base_vect_type,& & psb_i_base_vect_type - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_csput_v + end subroutine psb_d_cuda_elg_csput_v end interface interface - subroutine psb_d_elg_from_gpu(a,info) - import :: psb_d_elg_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_elg_from_gpu(a,info) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_from_gpu + end subroutine psb_d_cuda_elg_from_gpu end interface interface - subroutine psb_d_elg_to_gpu(a,info, nzrm) - import :: psb_d_elg_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_d_elg_to_gpu + end subroutine psb_d_cuda_elg_to_gpu end interface interface - subroutine psb_d_cp_elg_from_coo(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_elg_from_coo + end subroutine psb_d_cuda_cp_elg_from_coo end interface interface - subroutine psb_d_cp_elg_from_fmt(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_elg_from_fmt + end subroutine psb_d_cuda_cp_elg_from_fmt end interface interface - subroutine psb_d_mv_elg_from_coo(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_elg_from_coo(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_elg_from_coo + end subroutine psb_d_cuda_mv_elg_from_coo end interface interface - subroutine psb_d_mv_elg_from_fmt(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_elg_from_fmt + end subroutine psb_d_cuda_mv_elg_from_fmt end interface interface - subroutine psb_d_elg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_elg_csmv + end subroutine psb_d_cuda_elg_csmv end interface interface - subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_elg_csmm + end subroutine psb_d_cuda_elg_csmm end interface interface - subroutine psb_d_elg_scal(d,a,info, side) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_elg_scal(d,a,info, side) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_d_elg_scal + end subroutine psb_d_cuda_elg_scal end interface interface - subroutine psb_d_elg_scals(d,a,info) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_elg_scals(d,a,info) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_scals + end subroutine psb_d_cuda_elg_scals end interface interface - subroutine psb_d_elg_asb(a) - import :: psb_d_elg_sparse_mat - class(psb_d_elg_sparse_mat), intent(inout) :: a - end subroutine psb_d_elg_asb + subroutine psb_d_cuda_elg_asb(a) + import :: psb_d_cuda_elg_sparse_mat + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_elg_asb end interface @@ -287,9 +287,9 @@ contains ! == =================================== - function d_elg_sizeof(a) result(res) + function d_cuda_elg_sizeof(a) result(res) implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res if (a%is_dev()) call a%sync() @@ -302,13 +302,13 @@ contains ! on the GPU device side? ! res = 2*res - end function d_elg_sizeof + end function d_cuda_elg_sizeof - function d_elg_get_fmt() result(res) + function d_cuda_elg_get_fmt() result(res) implicit none character(len=5) :: res res = 'ELG' - end function d_elg_get_fmt + end function d_cuda_elg_get_fmt @@ -323,12 +323,12 @@ contains ! ! ! == =================================== - subroutine d_elg_reinit(a,clear) + subroutine d_cuda_elg_reinit(a,clear) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a logical, intent(in), optional :: clear integer(psb_ipk_) :: isz, err_act character(len=20) :: name='reinit' @@ -367,14 +367,14 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine d_elg_reinit + end subroutine d_cuda_elg_reinit - subroutine d_elg_free(a) + subroutine d_cuda_elg_free(a) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) @@ -384,12 +384,12 @@ contains return - end subroutine d_elg_free + end subroutine d_cuda_elg_free - subroutine d_elg_sync(a) + subroutine d_cuda_elg_sync(a) implicit none - class(psb_d_elg_sparse_mat), target, intent(in) :: a - class(psb_d_elg_sparse_mat), pointer :: tmpa + class(psb_d_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -401,83 +401,83 @@ contains call tmpa%set_sync() return - end subroutine d_elg_sync + end subroutine d_cuda_elg_sync - subroutine d_elg_set_host(a) + subroutine d_cuda_elg_set_host(a) implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine d_elg_set_host + end subroutine d_cuda_elg_set_host - subroutine d_elg_set_dev(a) + subroutine d_cuda_elg_set_dev(a) implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine d_elg_set_dev + end subroutine d_cuda_elg_set_dev - subroutine d_elg_set_sync(a) + subroutine d_cuda_elg_set_sync(a) implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine d_elg_set_sync + end subroutine d_cuda_elg_set_sync - function d_elg_is_dev(a) result(res) + function d_cuda_elg_is_dev(a) result(res) implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function d_elg_is_dev + end function d_cuda_elg_is_dev - function d_elg_is_host(a) result(res) + function d_cuda_elg_is_host(a) result(res) implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function d_elg_is_host + end function d_cuda_elg_is_host - function d_elg_is_sync(a) result(res) + function d_cuda_elg_is_sync(a) result(res) implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function d_elg_is_sync + end function d_cuda_elg_is_sync - subroutine d_elg_finalize(a) + subroutine d_cuda_elg_finalize(a) use elldev_mod implicit none - type(psb_d_elg_sparse_mat), intent(inout) :: a + type(psb_d_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine d_elg_finalize + end subroutine d_cuda_elg_finalize #else interface - subroutine psb_d_elg_asb(a) - import :: psb_d_elg_sparse_mat - class(psb_d_elg_sparse_mat), intent(inout) :: a - end subroutine psb_d_elg_asb + subroutine psb_d_cuda_elg_asb(a) + import :: psb_d_cuda_elg_sparse_mat + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_elg_asb end interface interface - subroutine psb_d_elg_mold(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_mold(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_mold + end subroutine psb_d_cuda_elg_mold end interface #endif -end module psb_d_elg_mat_mod +end module psb_d_cuda_elg_mat_mod diff --git a/cuda/psb_d_hdiag_mat_mod.F90 b/cuda/psb_d_cuda_hdiag_mat_mod.F90 similarity index 50% rename from cuda/psb_d_hdiag_mat_mod.F90 rename to cuda/psb_d_cuda_hdiag_mat_mod.F90 index 1bc70c8c..17bacffe 100644 --- a/cuda/psb_d_hdiag_mat_mod.F90 +++ b/cuda/psb_d_cuda_hdiag_mat_mod.F90 @@ -30,182 +30,182 @@ ! -module psb_d_hdiag_mat_mod +module psb_d_cuda_hdiag_mat_mod use iso_c_binding use psb_base_mod use psb_d_hdia_mat_mod - type, extends(psb_d_hdia_sparse_mat) :: psb_d_hdiag_sparse_mat + type, extends(psb_d_hdia_sparse_mat) :: psb_d_cuda_hdiag_sparse_mat ! #ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => d_hdiag_get_fmt - ! procedure, pass(a) :: sizeof => d_hdiag_sizeof - procedure, pass(a) :: vect_mv => psb_d_hdiag_vect_mv - ! procedure, pass(a) :: csmm => psb_d_hdiag_csmm - procedure, pass(a) :: csmv => psb_d_hdiag_csmv - ! procedure, pass(a) :: in_vect_sv => psb_d_hdiag_inner_vect_sv - ! procedure, pass(a) :: scals => psb_d_hdiag_scals - ! procedure, pass(a) :: scalv => psb_d_hdiag_scal - ! procedure, pass(a) :: reallocate_nz => psb_d_hdiag_reallocate_nz - ! procedure, pass(a) :: allocate_mnnz => psb_d_hdiag_allocate_mnnz + procedure, nopass :: get_fmt => d_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => d_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_d_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_d_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_d_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_d_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_d_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_d_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_d_cuda_hdiag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_hdiag_from_coo - ! procedure, pass(a) :: cp_from_fmt => psb_d_cp_hdiag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_hdiag_from_coo - ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_hdiag_from_fmt - procedure, pass(a) :: free => d_hdiag_free - procedure, pass(a) :: mold => psb_d_hdiag_mold - procedure, pass(a) :: to_gpu => psb_d_hdiag_to_gpu - final :: d_hdiag_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => d_cuda_hdiag_free + procedure, pass(a) :: mold => psb_d_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_hdiag_to_gpu + final :: d_cuda_hdiag_finalize #else contains - procedure, pass(a) :: mold => psb_d_hdiag_mold + procedure, pass(a) :: mold => psb_d_cuda_hdiag_mold #endif - end type psb_d_hdiag_sparse_mat + end type psb_d_cuda_hdiag_sparse_mat #ifdef HAVE_SPGPU - private :: d_hdiag_get_nzeros, d_hdiag_free, d_hdiag_get_fmt, & - & d_hdiag_get_size, d_hdiag_sizeof, d_hdiag_get_nz_row + private :: d_cuda_hdiag_get_nzeros, d_cuda_hdiag_free, d_cuda_hdiag_get_fmt, & + & d_cuda_hdiag_get_size, d_cuda_hdiag_sizeof, d_cuda_hdiag_get_nz_row interface - subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hdiag_vect_mv + end subroutine psb_d_cuda_hdiag_vect_mv end interface !!$ interface -!!$ subroutine psb_d_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_d_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type -!!$ class(psb_d_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_d_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_hdiag_inner_vect_sv +!!$ end subroutine psb_d_cuda_hdiag_inner_vect_sv !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_hdiag_reallocate_nz(nz,a) -!!$ import :: psb_d_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_d_hdiag_reallocate_nz +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_cuda_hdiag_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_hdiag_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_d_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_d_hdiag_allocate_mnnz +!!$ end subroutine psb_d_cuda_hdiag_allocate_mnnz !!$ end interface interface - subroutine psb_d_hdiag_mold(a,b,info) - import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hdiag_mold(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hdiag_mold + end subroutine psb_d_cuda_hdiag_mold end interface interface - subroutine psb_d_hdiag_to_gpu(a,info) - import :: psb_d_hdiag_sparse_mat, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hdiag_to_gpu(a,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hdiag_to_gpu + end subroutine psb_d_cuda_hdiag_to_gpu end interface interface - subroutine psb_d_cp_hdiag_from_coo(a,b,info) - import :: psb_d_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_hdiag_from_coo + end subroutine psb_d_cuda_cp_hdiag_from_coo end interface !!$ interface -!!$ subroutine psb_d_cp_hdiag_from_fmt(a,b,info) -!!$ import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_d_base_sparse_mat), intent(in) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_d_cp_hdiag_from_fmt +!!$ end subroutine psb_d_cuda_cp_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_d_mv_hdiag_from_coo(a,b,info) - import :: psb_d_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_hdiag_from_coo + end subroutine psb_d_cuda_mv_hdiag_from_coo end interface !!$ !!$ interface -!!$ subroutine psb_d_mv_hdiag_from_fmt(a,b,info) -!!$ import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_d_base_sparse_mat), intent(inout) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_d_mv_hdiag_from_fmt +!!$ end subroutine psb_d_cuda_mv_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hdiag_csmv + end subroutine psb_d_cuda_hdiag_csmv end interface !!$ interface -!!$ subroutine psb_d_hdiag_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) !!$ real(psb_dpk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_hdiag_csmm +!!$ end subroutine psb_d_cuda_hdiag_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_hdiag_scal(d,a,info, side) -!!$ import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ real(psb_dpk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_d_hdiag_scal +!!$ end subroutine psb_d_cuda_hdiag_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_hdiag_scals(d,a,info) -!!$ import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ real(psb_dpk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_d_hdiag_scals +!!$ end subroutine psb_d_cuda_hdiag_scals !!$ end interface !!$ @@ -223,11 +223,11 @@ contains ! ! == =================================== - function d_hdiag_get_fmt() result(res) + function d_cuda_hdiag_get_fmt() result(res) implicit none character(len=5) :: res res = 'HDIAG' - end function d_hdiag_get_fmt + end function d_cuda_hdiag_get_fmt @@ -243,11 +243,11 @@ contains ! ! == =================================== - subroutine d_hdiag_free(a) + subroutine d_cuda_hdiag_free(a) use hdiagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -256,12 +256,12 @@ contains return - end subroutine d_hdiag_free + end subroutine d_cuda_hdiag_free - subroutine d_hdiag_finalize(a) + subroutine d_cuda_hdiag_finalize(a) use hdiagdev_mod implicit none - type(psb_d_hdiag_sparse_mat), intent(inout) :: a + type(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -269,19 +269,19 @@ contains call a%psb_d_hdia_sparse_mat%free() return - end subroutine d_hdiag_finalize + end subroutine d_cuda_hdiag_finalize #else interface - subroutine psb_d_hdiag_mold(a,b,info) - import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hdiag_mold(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hdiag_mold + end subroutine psb_d_cuda_hdiag_mold end interface #endif -end module psb_d_hdiag_mat_mod +end module psb_d_cuda_hdiag_mat_mod diff --git a/cuda/psb_d_hlg_mat_mod.F90 b/cuda/psb_d_cuda_hlg_mat_mod.F90 similarity index 50% rename from cuda/psb_d_hlg_mat_mod.F90 rename to cuda/psb_d_cuda_hlg_mat_mod.F90 index 756d13aa..19ecb62b 100644 --- a/cuda/psb_d_hlg_mat_mod.F90 +++ b/cuda/psb_d_cuda_hlg_mat_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_d_hlg_mat_mod +module psb_d_cuda_hlg_mat_mod use iso_c_binding use psb_d_mat_mod @@ -41,7 +41,7 @@ module psb_d_hlg_mat_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_d_hll_sparse_mat) :: psb_d_hlg_sparse_mat + type, extends(psb_d_hll_sparse_mat) :: psb_d_cuda_hlg_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,186 +54,186 @@ module psb_d_hlg_mat_mod integer :: devstate = is_host contains - procedure, nopass :: get_fmt => d_hlg_get_fmt - procedure, pass(a) :: sizeof => d_hlg_sizeof - procedure, pass(a) :: vect_mv => psb_d_hlg_vect_mv - procedure, pass(a) :: csmm => psb_d_hlg_csmm - procedure, pass(a) :: csmv => psb_d_hlg_csmv - procedure, pass(a) :: in_vect_sv => psb_d_hlg_inner_vect_sv - procedure, pass(a) :: scals => psb_d_hlg_scals - procedure, pass(a) :: scalv => psb_d_hlg_scal - procedure, pass(a) :: reallocate_nz => psb_d_hlg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_hlg_allocate_mnnz + procedure, nopass :: get_fmt => d_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_d_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_d_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_d_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_hlg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_hlg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_cp_hlg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_hlg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_mv_hlg_from_fmt - procedure, pass(a) :: free => d_hlg_free - procedure, pass(a) :: mold => psb_d_hlg_mold - procedure, pass(a) :: is_host => d_hlg_is_host - procedure, pass(a) :: is_dev => d_hlg_is_dev - procedure, pass(a) :: is_sync => d_hlg_is_sync - procedure, pass(a) :: set_host => d_hlg_set_host - procedure, pass(a) :: set_dev => d_hlg_set_dev - procedure, pass(a) :: set_sync => d_hlg_set_sync - procedure, pass(a) :: sync => d_hlg_sync - procedure, pass(a) :: from_gpu => psb_d_hlg_from_gpu - procedure, pass(a) :: to_gpu => psb_d_hlg_to_gpu - final :: d_hlg_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => d_cuda_hlg_free + procedure, pass(a) :: mold => psb_d_cuda_hlg_mold + procedure, pass(a) :: is_host => d_cuda_hlg_is_host + procedure, pass(a) :: is_dev => d_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => d_cuda_hlg_is_sync + procedure, pass(a) :: set_host => d_cuda_hlg_set_host + procedure, pass(a) :: set_dev => d_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => d_cuda_hlg_set_sync + procedure, pass(a) :: sync => d_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_d_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_d_cuda_hlg_to_gpu + final :: d_cuda_hlg_finalize #else contains - procedure, pass(a) :: mold => psb_d_hlg_mold + procedure, pass(a) :: mold => psb_d_cuda_hlg_mold #endif - end type psb_d_hlg_sparse_mat + end type psb_d_cuda_hlg_sparse_mat #ifdef HAVE_SPGPU - private :: d_hlg_get_nzeros, d_hlg_free, d_hlg_get_fmt, & - & d_hlg_get_size, d_hlg_sizeof, d_hlg_get_nz_row + private :: d_cuda_hlg_get_nzeros, d_cuda_hlg_free, d_cuda_hlg_get_fmt, & + & d_cuda_hlg_get_size, d_cuda_hlg_sizeof, d_cuda_hlg_get_nz_row interface - subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hlg_vect_mv + end subroutine psb_d_cuda_hlg_vect_mv end interface interface - subroutine psb_d_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_d_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hlg_inner_vect_sv + end subroutine psb_d_cuda_hlg_inner_vect_sv end interface interface - subroutine psb_d_hlg_reallocate_nz(nz,a) - import :: psb_d_hlg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_hlg_reallocate_nz(nz,a) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_d_hlg_sparse_mat), intent(inout) :: a - end subroutine psb_d_hlg_reallocate_nz + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_hlg_reallocate_nz end interface interface - subroutine psb_d_hlg_allocate_mnnz(m,n,a,nz) - import :: psb_d_hlg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_hlg_allocate_mnnz + end subroutine psb_d_cuda_hlg_allocate_mnnz end interface interface - subroutine psb_d_hlg_mold(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_mold(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hlg_mold + end subroutine psb_d_cuda_hlg_mold end interface interface - subroutine psb_d_hlg_from_gpu(a,info) - import :: psb_d_hlg_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hlg_from_gpu(a,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hlg_from_gpu + end subroutine psb_d_cuda_hlg_from_gpu end interface interface - subroutine psb_d_hlg_to_gpu(a,info, nzrm) - import :: psb_d_hlg_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_d_hlg_to_gpu + end subroutine psb_d_cuda_hlg_to_gpu end interface interface - subroutine psb_d_cp_hlg_from_coo(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_hlg_from_coo + end subroutine psb_d_cuda_cp_hlg_from_coo end interface interface - subroutine psb_d_cp_hlg_from_fmt(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_hlg_from_fmt + end subroutine psb_d_cuda_cp_hlg_from_fmt end interface interface - subroutine psb_d_mv_hlg_from_coo(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_hlg_from_coo + end subroutine psb_d_cuda_mv_hlg_from_coo end interface interface - subroutine psb_d_mv_hlg_from_fmt(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_hlg_from_fmt + end subroutine psb_d_cuda_mv_hlg_from_fmt end interface interface - subroutine psb_d_hlg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hlg_csmv + end subroutine psb_d_cuda_hlg_csmv end interface interface - subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hlg_csmm + end subroutine psb_d_cuda_hlg_csmm end interface interface - subroutine psb_d_hlg_scal(d,a,info, side) - import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hlg_scal(d,a,info, side) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_d_hlg_scal + end subroutine psb_d_cuda_hlg_scal end interface interface - subroutine psb_d_hlg_scals(d,a,info) - import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hlg_scals(d,a,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hlg_scals + end subroutine psb_d_cuda_hlg_scals end interface @@ -252,9 +252,9 @@ contains ! == =================================== - function d_hlg_sizeof(a) result(res) + function d_cuda_hlg_sizeof(a) result(res) implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res @@ -269,13 +269,13 @@ contains ! on the GPU device side? ! res = 2*res - end function d_hlg_sizeof + end function d_cuda_hlg_sizeof - function d_hlg_get_fmt() result(res) + function d_cuda_hlg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HLG' - end function d_hlg_get_fmt + end function d_cuda_hlg_get_fmt @@ -291,11 +291,11 @@ contains ! ! == =================================== - subroutine d_hlg_free(a) + subroutine d_cuda_hlg_free(a) use hlldev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) @@ -304,13 +304,13 @@ contains return - end subroutine d_hlg_free + end subroutine d_cuda_hlg_free - subroutine d_hlg_sync(a) + subroutine d_cuda_hlg_sync(a) implicit none - class(psb_d_hlg_sparse_mat), target, intent(in) :: a - class(psb_d_hlg_sparse_mat), pointer :: tmpa + class(psb_d_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -322,77 +322,77 @@ contains call tmpa%set_sync() return - end subroutine d_hlg_sync + end subroutine d_cuda_hlg_sync - subroutine d_hlg_set_host(a) + subroutine d_cuda_hlg_set_host(a) implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine d_hlg_set_host + end subroutine d_cuda_hlg_set_host - subroutine d_hlg_set_dev(a) + subroutine d_cuda_hlg_set_dev(a) implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine d_hlg_set_dev + end subroutine d_cuda_hlg_set_dev - subroutine d_hlg_set_sync(a) + subroutine d_cuda_hlg_set_sync(a) implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine d_hlg_set_sync + end subroutine d_cuda_hlg_set_sync - function d_hlg_is_dev(a) result(res) + function d_cuda_hlg_is_dev(a) result(res) implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function d_hlg_is_dev + end function d_cuda_hlg_is_dev - function d_hlg_is_host(a) result(res) + function d_cuda_hlg_is_host(a) result(res) implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function d_hlg_is_host + end function d_cuda_hlg_is_host - function d_hlg_is_sync(a) result(res) + function d_cuda_hlg_is_sync(a) result(res) implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function d_hlg_is_sync + end function d_cuda_hlg_is_sync - subroutine d_hlg_finalize(a) + subroutine d_cuda_hlg_finalize(a) use hlldev_mod implicit none - type(psb_d_hlg_sparse_mat), intent(inout) :: a + type(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine d_hlg_finalize + end subroutine d_cuda_hlg_finalize #else interface - subroutine psb_d_hlg_mold(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_mold(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hlg_mold + end subroutine psb_d_cuda_hlg_mold end interface #endif -end module psb_d_hlg_mat_mod +end module psb_d_cuda_hlg_mat_mod diff --git a/cuda/psb_d_hybg_mat_mod.F90 b/cuda/psb_d_cuda_hybg_mat_mod.F90 similarity index 52% rename from cuda/psb_d_hybg_mat_mod.F90 rename to cuda/psb_d_cuda_hybg_mat_mod.F90 index d764daa7..be4c8392 100644 --- a/cuda/psb_d_hybg_mat_mod.F90 +++ b/cuda/psb_d_cuda_hybg_mat_mod.F90 @@ -31,13 +31,13 @@ #if CUDA_SHORT_VERSION <= 10 -module psb_d_hybg_mat_mod +module psb_d_cuda_hybg_mat_mod use iso_c_binding use psb_d_mat_mod use cusparse_mod - type, extends(psb_d_csr_sparse_mat) :: psb_d_hybg_sparse_mat + type, extends(psb_d_csr_sparse_mat) :: psb_d_cuda_hybg_sparse_mat ! ! HYBG. An interface to the cuSPARSE HYB ! On the CPU side we keep a CSR storage. @@ -49,170 +49,170 @@ module psb_d_hybg_mat_mod type(d_Hmat) :: deviceMat contains - procedure, nopass :: get_fmt => d_hybg_get_fmt - procedure, pass(a) :: sizeof => d_hybg_sizeof - procedure, pass(a) :: vect_mv => psb_d_hybg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_hybg_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_hybg_csmm - procedure, pass(a) :: csmv => psb_d_hybg_csmv - procedure, pass(a) :: scals => psb_d_hybg_scals - procedure, pass(a) :: scalv => psb_d_hybg_scal - procedure, pass(a) :: reallocate_nz => psb_d_hybg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_hybg_allocate_mnnz + procedure, nopass :: get_fmt => d_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_d_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_d_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_hybg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_hybg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_cp_hybg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_hybg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_mv_hybg_from_fmt - procedure, pass(a) :: free => d_hybg_free - procedure, pass(a) :: mold => psb_d_hybg_mold - procedure, pass(a) :: to_gpu => psb_d_hybg_to_gpu - final :: d_hybg_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => d_cuda_hybg_free + procedure, pass(a) :: mold => psb_d_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_hybg_to_gpu + final :: d_cuda_hybg_finalize #else contains - procedure, pass(a) :: mold => psb_d_hybg_mold + procedure, pass(a) :: mold => psb_d_cuda_hybg_mold #endif - end type psb_d_hybg_sparse_mat + end type psb_d_cuda_hybg_sparse_mat #ifdef HAVE_SPGPU - private :: d_hybg_get_nzeros, d_hybg_free, d_hybg_get_fmt, & - & d_hybg_get_size, d_hybg_sizeof, d_hybg_get_nz_row + private :: d_cuda_hybg_get_nzeros, d_cuda_hybg_free, d_cuda_hybg_get_fmt, & + & d_cuda_hybg_get_size, d_cuda_hybg_sizeof, d_cuda_hybg_get_nz_row interface - subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hybg_inner_vect_sv + end subroutine psb_d_cuda_hybg_inner_vect_sv end interface interface - subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hybg_vect_mv + end subroutine psb_d_cuda_hybg_vect_mv end interface interface - subroutine psb_d_hybg_reallocate_nz(nz,a) - import :: psb_d_hybg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_hybg_reallocate_nz(nz,a) + import :: psb_d_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_d_hybg_sparse_mat), intent(inout) :: a - end subroutine psb_d_hybg_reallocate_nz + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_hybg_reallocate_nz end interface interface - subroutine psb_d_hybg_allocate_mnnz(m,n,a,nz) - import :: psb_d_hybg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_hybg_allocate_mnnz + end subroutine psb_d_cuda_hybg_allocate_mnnz end interface interface - subroutine psb_d_hybg_mold(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_mold(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hybg_mold + end subroutine psb_d_cuda_hybg_mold end interface interface - subroutine psb_d_hybg_to_gpu(a,info, nzrm) - import :: psb_d_hybg_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_d_hybg_to_gpu + end subroutine psb_d_cuda_hybg_to_gpu end interface interface - subroutine psb_d_cp_hybg_from_coo(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_hybg_from_coo + end subroutine psb_d_cuda_cp_hybg_from_coo end interface interface - subroutine psb_d_cp_hybg_from_fmt(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_hybg_from_fmt + end subroutine psb_d_cuda_cp_hybg_from_fmt end interface interface - subroutine psb_d_mv_hybg_from_coo(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_hybg_from_coo + end subroutine psb_d_cuda_mv_hybg_from_coo end interface interface - subroutine psb_d_mv_hybg_from_fmt(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_hybg_from_fmt + end subroutine psb_d_cuda_mv_hybg_from_fmt end interface interface - subroutine psb_d_hybg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hybg_csmv + end subroutine psb_d_cuda_hybg_csmv end interface interface - subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hybg_csmm + end subroutine psb_d_cuda_hybg_csmm end interface interface - subroutine psb_d_hybg_scal(d,a,info,side) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hybg_scal(d,a,info,side) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_d_hybg_scal + end subroutine psb_d_cuda_hybg_scal end interface interface - subroutine psb_d_hybg_scals(d,a,info) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hybg_scals(d,a,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hybg_scals + end subroutine psb_d_cuda_hybg_scals end interface @@ -231,9 +231,9 @@ contains ! == =================================== - function d_hybg_sizeof(a) result(res) + function d_cuda_hybg_sizeof(a) result(res) implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 res = res + psb_sizeof_dp * size(a%val) @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function d_hybg_sizeof + end function d_cuda_hybg_sizeof - function d_hybg_get_fmt() result(res) + function d_cuda_hybg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HYBG' - end function d_hybg_get_fmt + end function d_cuda_hybg_get_fmt @@ -265,42 +265,42 @@ contains ! ! == =================================== - subroutine d_hybg_free(a) + subroutine d_cuda_hybg_free(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) call a%psb_d_csr_sparse_mat%free() return - end subroutine d_hybg_free + end subroutine d_cuda_hybg_free - subroutine d_hybg_finalize(a) + subroutine d_cuda_hybg_finalize(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - type(psb_d_hybg_sparse_mat), intent(inout) :: a + type(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) return - end subroutine d_hybg_finalize + end subroutine d_cuda_hybg_finalize #else interface - subroutine psb_d_hybg_mold(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_mold(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hybg_mold + end subroutine psb_d_cuda_hybg_mold end interface #endif -end module psb_d_hybg_mat_mod +end module psb_d_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_d_gpu_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 similarity index 72% rename from cuda/psb_d_gpu_vect_mod.F90 rename to cuda/psb_d_cuda_vect_mod.F90 index cd3757c3..83ec108b 100644 --- a/cuda/psb_d_gpu_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -30,15 +30,15 @@ ! -module psb_d_gpu_vect_mod +module psb_d_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod use psb_d_vect_mod use psb_i_vect_mod #ifdef HAVE_SPGPU - use psb_gpu_env_mod - use psb_i_gpu_vect_mod + use psb_cuda_env_mod + use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_d_vectordev_mod #endif @@ -47,7 +47,7 @@ module psb_d_gpu_vect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_d_base_vect_type) :: psb_d_vect_gpu + type, extends(psb_d_base_vect_type) :: psb_d_vect_cuda #ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr @@ -59,66 +59,66 @@ module psb_d_gpu_vect_mod type(c_ptr) :: i_buf = c_null_ptr integer :: i_buf_sz = 0 contains - procedure, pass(x) :: get_nrows => d_gpu_get_nrows - procedure, nopass :: get_fmt => d_gpu_get_fmt - - procedure, pass(x) :: all => d_gpu_all - procedure, pass(x) :: zero => d_gpu_zero - procedure, pass(x) :: asb_m => d_gpu_asb_m - procedure, pass(x) :: sync => d_gpu_sync - procedure, pass(x) :: sync_space => d_gpu_sync_space - procedure, pass(x) :: bld_x => d_gpu_bld_x - procedure, pass(x) :: bld_mn => d_gpu_bld_mn - procedure, pass(x) :: free => d_gpu_free - procedure, pass(x) :: ins_a => d_gpu_ins_a - procedure, pass(x) :: ins_v => d_gpu_ins_v - procedure, pass(x) :: is_host => d_gpu_is_host - procedure, pass(x) :: is_dev => d_gpu_is_dev - procedure, pass(x) :: is_sync => d_gpu_is_sync - procedure, pass(x) :: set_host => d_gpu_set_host - procedure, pass(x) :: set_dev => d_gpu_set_dev - procedure, pass(x) :: set_sync => d_gpu_set_sync - procedure, pass(x) :: set_scal => d_gpu_set_scal -!!$ procedure, pass(x) :: set_vect => d_gpu_set_vect - procedure, pass(x) :: gthzv_x => d_gpu_gthzv_x - procedure, pass(y) :: sctb => d_gpu_sctb - procedure, pass(y) :: sctb_x => d_gpu_sctb_x - procedure, pass(x) :: gthzbuf => d_gpu_gthzbuf - procedure, pass(y) :: sctb_buf => d_gpu_sctb_buf - procedure, pass(x) :: new_buffer => d_gpu_new_buffer - procedure, nopass :: device_wait => d_gpu_device_wait - procedure, pass(x) :: free_buffer => d_gpu_free_buffer - procedure, pass(x) :: maybe_free_buffer => d_gpu_maybe_free_buffer - procedure, pass(x) :: dot_v => d_gpu_dot_v - procedure, pass(x) :: dot_a => d_gpu_dot_a - procedure, pass(y) :: axpby_v => d_gpu_axpby_v - procedure, pass(y) :: axpby_a => d_gpu_axpby_a - procedure, pass(y) :: mlt_v => d_gpu_mlt_v - procedure, pass(y) :: mlt_a => d_gpu_mlt_a - procedure, pass(z) :: mlt_a_2 => d_gpu_mlt_a_2 - procedure, pass(z) :: mlt_v_2 => d_gpu_mlt_v_2 - procedure, pass(x) :: scal => d_gpu_scal - procedure, pass(x) :: nrm2 => d_gpu_nrm2 - procedure, pass(x) :: amax => d_gpu_amax - procedure, pass(x) :: asum => d_gpu_asum - procedure, pass(x) :: absval1 => d_gpu_absval1 - procedure, pass(x) :: absval2 => d_gpu_absval2 - - final :: d_gpu_vect_finalize + procedure, pass(x) :: get_nrows => d_cuda_get_nrows + procedure, nopass :: get_fmt => d_cuda_get_fmt + + procedure, pass(x) :: all => d_cuda_all + procedure, pass(x) :: zero => d_cuda_zero + procedure, pass(x) :: asb_m => d_cuda_asb_m + procedure, pass(x) :: sync => d_cuda_sync + procedure, pass(x) :: sync_space => d_cuda_sync_space + procedure, pass(x) :: bld_x => d_cuda_bld_x + procedure, pass(x) :: bld_mn => d_cuda_bld_mn + procedure, pass(x) :: free => d_cuda_free + procedure, pass(x) :: ins_a => d_cuda_ins_a + procedure, pass(x) :: ins_v => d_cuda_ins_v + procedure, pass(x) :: is_host => d_cuda_is_host + procedure, pass(x) :: is_dev => d_cuda_is_dev + procedure, pass(x) :: is_sync => d_cuda_is_sync + procedure, pass(x) :: set_host => d_cuda_set_host + procedure, pass(x) :: set_dev => d_cuda_set_dev + procedure, pass(x) :: set_sync => d_cuda_set_sync + procedure, pass(x) :: set_scal => d_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => d_cuda_set_vect + procedure, pass(x) :: gthzv_x => d_cuda_gthzv_x + procedure, pass(y) :: sctb => d_cuda_sctb + procedure, pass(y) :: sctb_x => d_cuda_sctb_x + procedure, pass(x) :: gthzbuf => d_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => d_cuda_sctb_buf + procedure, pass(x) :: new_buffer => d_cuda_new_buffer + procedure, nopass :: device_wait => d_cuda_device_wait + procedure, pass(x) :: free_buffer => d_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => d_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => d_cuda_dot_v + procedure, pass(x) :: dot_a => d_cuda_dot_a + procedure, pass(y) :: axpby_v => d_cuda_axpby_v + procedure, pass(y) :: axpby_a => d_cuda_axpby_a + procedure, pass(y) :: mlt_v => d_cuda_mlt_v + procedure, pass(y) :: mlt_a => d_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => d_cuda_mlt_v_2 + procedure, pass(x) :: scal => d_cuda_scal + procedure, pass(x) :: nrm2 => d_cuda_nrm2 + procedure, pass(x) :: amax => d_cuda_amax + procedure, pass(x) :: asum => d_cuda_asum + procedure, pass(x) :: absval1 => d_cuda_absval1 + procedure, pass(x) :: absval2 => d_cuda_absval2 + + final :: d_cuda_vect_finalize #endif - end type psb_d_vect_gpu + end type psb_d_vect_cuda - public :: psb_d_vect_gpu_ + public :: psb_d_vect_cuda_ private :: constructor - interface psb_d_vect_gpu_ + interface psb_d_vect_cuda_ module procedure constructor - end interface psb_d_vect_gpu_ + end interface psb_d_vect_cuda_ contains function constructor(x) result(this) real(psb_dpk_) :: x(:) - type(psb_d_vect_gpu) :: this + type(psb_d_vect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -128,20 +128,20 @@ contains #ifdef HAVE_SPGPU - subroutine d_gpu_device_wait() + subroutine d_cuda_device_wait() call psb_cudaSync() - end subroutine d_gpu_device_wait + end subroutine d_cuda_device_wait - subroutine d_gpu_new_buffer(n,x,info) + subroutine d_cuda_new_buffer(n,x,info) use psb_realloc_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(x%combuf)) then if (size(x%combuf) idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then ! ! Only need a sync in this branch; in the others ! cudamemCpy acts as a sync point. @@ -331,14 +331,14 @@ contains end select - end subroutine d_gpu_gthzv_x + end subroutine d_cuda_gthzv_x - subroutine d_gpu_gthzbuf(i,n,idx,x) - use psb_gpu_env_mod + subroutine d_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i,n class(psb_i_base_vect_type) :: idx - class(psb_d_vect_gpu) :: x + class(psb_d_vect_cuda) :: x integer :: info, ni info = 0 @@ -349,11 +349,11 @@ contains end if select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) @@ -384,14 +384,14 @@ contains end select - end subroutine d_gpu_gthzbuf + end subroutine d_cuda_gthzbuf - subroutine d_gpu_sctb(n,idx,x,beta,y) + subroutine d_cuda_sctb(n,idx,x,beta,y) implicit none !use psb_const_mod integer(psb_ipk_) :: n, idx(:) real(psb_dpk_) :: beta, x(:) - class(psb_d_vect_gpu) :: y + class(psb_d_vect_cuda) :: y integer(psb_ipk_) :: info if (n == 0) return @@ -401,24 +401,24 @@ contains call y%psb_d_base_vect_type%sctb(n,idx,x,beta) call y%set_host() - end subroutine d_gpu_sctb + end subroutine d_cuda_sctb - subroutine d_gpu_sctb_x(i,n,idx,x,beta,y) - use psb_gpu_env_mod + subroutine d_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx real(psb_dpk_) :: beta, x(:) - class(psb_d_vect_gpu) :: y + class(psb_d_vect_cuda) :: y integer :: info, ni select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() ! - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(y%pinned_buffer)) then if (size(y%pinned_buffer) < n) then call inner_unregister(y%pinned_buffer) @@ -506,16 +506,16 @@ contains call psb_cudaSync() call y%set_dev() - end subroutine d_gpu_sctb_x + end subroutine d_cuda_sctb_x - subroutine d_gpu_sctb_buf(i,n,idx,beta,y) + subroutine d_cuda_sctb_buf(i,n,idx,beta,y) use psi_serial_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx real(psb_dpk_) :: beta - class(psb_d_vect_gpu) :: y + class(psb_d_vect_cuda) :: y integer(psb_ipk_) :: info, ni !!$ write(0,*) 'Starting sctb_buf' @@ -526,11 +526,11 @@ contains select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) else @@ -557,106 +557,106 @@ contains end select !!$ write(0,*) 'Done sctb_buf' - end subroutine d_gpu_sctb_buf + end subroutine d_cuda_sctb_buf - subroutine d_gpu_bld_x(x,this) + subroutine d_cuda_bld_x(x,this) use psb_base_mod real(psb_dpk_), intent(in) :: this(:) - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call psb_realloc(size(this),x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'d_gpu_bld_x',& + call psb_errpush(info,'d_cuda_bld_x',& & i_err=(/size(this),izero,izero,izero,izero/)) end if x%v(:) = this(:) call x%set_host() call x%sync() - end subroutine d_gpu_bld_x + end subroutine d_cuda_bld_x - subroutine d_gpu_bld_mn(x,n) + subroutine d_cuda_bld_mn(x,n) integer(psb_mpk_), intent(in) :: n - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(n,info) if (info /= 0) then - call psb_errpush(info,'d_gpu_bld_n',i_err=(/n,n,n,n,n/)) + call psb_errpush(info,'d_cuda_bld_n',i_err=(/n,n,n,n,n/)) end if - end subroutine d_gpu_bld_mn + end subroutine d_cuda_bld_mn - subroutine d_gpu_set_host(x) + subroutine d_cuda_set_host(x) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x x%state = is_host - end subroutine d_gpu_set_host + end subroutine d_cuda_set_host - subroutine d_gpu_set_dev(x) + subroutine d_cuda_set_dev(x) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x x%state = is_dev - end subroutine d_gpu_set_dev + end subroutine d_cuda_set_dev - subroutine d_gpu_set_sync(x) + subroutine d_cuda_set_sync(x) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x x%state = is_sync - end subroutine d_gpu_set_sync + end subroutine d_cuda_set_sync - function d_gpu_is_dev(x) result(res) + function d_cuda_is_dev(x) result(res) implicit none - class(psb_d_vect_gpu), intent(in) :: x + class(psb_d_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function d_gpu_is_dev + end function d_cuda_is_dev - function d_gpu_is_host(x) result(res) + function d_cuda_is_host(x) result(res) implicit none - class(psb_d_vect_gpu), intent(in) :: x + class(psb_d_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function d_gpu_is_host + end function d_cuda_is_host - function d_gpu_is_sync(x) result(res) + function d_cuda_is_sync(x) result(res) implicit none - class(psb_d_vect_gpu), intent(in) :: x + class(psb_d_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function d_gpu_is_sync + end function d_cuda_is_sync - function d_gpu_get_nrows(x) result(res) + function d_cuda_get_nrows(x) result(res) implicit none - class(psb_d_vect_gpu), intent(in) :: x + class(psb_d_vect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = size(x%v) - end function d_gpu_get_nrows + end function d_cuda_get_nrows - function d_gpu_get_fmt() result(res) + function d_cuda_get_fmt() result(res) implicit none character(len=5) :: res res = 'dGPU' - end function d_gpu_get_fmt + end function d_cuda_get_fmt - subroutine d_gpu_all(n, x, info) + subroutine d_cuda_all(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: n - class(psb_d_vect_gpu), intent(out) :: x + class(psb_d_vect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(n,x%v,info) @@ -664,26 +664,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'d_gpu_all',& + call psb_errpush(info,'d_cuda_all',& & i_err=(/n,n,n,n,n/)) end if - end subroutine d_gpu_all + end subroutine d_cuda_all - subroutine d_gpu_zero(x) + subroutine d_cuda_zero(x) use psi_serial_mod implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine d_gpu_zero + end subroutine d_cuda_zero - subroutine d_gpu_asb_m(n, x, info) + subroutine d_cuda_asb_m(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_mpk_), intent(in) :: n - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: nd @@ -703,12 +703,12 @@ contains end if end if - end subroutine d_gpu_asb_m + end subroutine d_cuda_asb_m - subroutine d_gpu_sync_space(x,info) + subroutine d_cuda_sync_space(x,info) use psb_base_mod, only : psb_realloc implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh, nd @@ -747,12 +747,12 @@ contains end if end if - end subroutine d_gpu_sync_space + end subroutine d_cuda_sync_space - subroutine d_gpu_sync(x) + subroutine d_cuda_sync(x) use psb_base_mod, only : psb_realloc implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -778,31 +778,31 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'d_gpu_sync') + call psb_errpush(info,'d_cuda_sync') end if - end subroutine d_gpu_sync + end subroutine d_cuda_sync - subroutine d_gpu_free(x, info) + subroutine d_cuda_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) if (c_associated(x%deviceVect)) then -!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' call freeMultiVecDevice(x%deviceVect) x%deviceVect=c_null_ptr end if call x%free_buffer(info) call x%set_sync() - end subroutine d_gpu_free + end subroutine d_cuda_free - subroutine d_gpu_set_scal(x,val,first,last) - class(psb_d_vect_gpu), intent(inout) :: x + subroutine d_cuda_set_scal(x,val,first,last) + class(psb_d_vect_cuda), intent(inout) :: x real(psb_dpk_), intent(in) :: val integer(psb_ipk_), optional :: first, last @@ -817,10 +817,10 @@ contains info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() - end subroutine d_gpu_set_scal + end subroutine d_cuda_set_scal !!$ -!!$ subroutine d_gpu_set_vect(x,val) -!!$ class(psb_d_vect_gpu), intent(inout) :: x +!!$ subroutine d_cuda_set_vect(x,val) +!!$ class(psb_d_vect_cuda), intent(inout) :: x !!$ real(psb_dpk_), intent(in) :: val(:) !!$ integer(psb_ipk_) :: nr !!$ integer(psb_ipk_) :: info @@ -829,13 +829,13 @@ contains !!$ call x%psb_d_base_vect_type%set_vect(val) !!$ call x%set_host() !!$ -!!$ end subroutine d_gpu_set_vect +!!$ end subroutine d_cuda_set_vect - function d_gpu_dot_v(n,x,y) result(res) + function d_cuda_dot_v(n,x,y) result(res) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res @@ -852,13 +852,13 @@ contains type is (psb_d_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n,x%v,1,yy%v,1) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) if (info /= 0) then info = psb_err_internal_error_ - call psb_errpush(info,'d_gpu_dot_v') + call psb_errpush(info,'d_cuda_dot_v') end if class default @@ -867,11 +867,11 @@ contains res = y%dot(n,x%v) end select - end function d_gpu_dot_v + end function d_cuda_dot_v - function d_gpu_dot_a(n,x,y) result(res) + function d_cuda_dot_a(n,x,y) result(res) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x real(psb_dpk_), intent(in) :: y(:) integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res @@ -880,14 +880,14 @@ contains if (x%is_dev()) call x%sync() res = ddot(n,y,1,x%v,1) - end function d_gpu_dot_a + end function d_cuda_dot_a - subroutine d_gpu_axpby_v(m,alpha, x, beta, y, info) + subroutine d_cuda_axpby_v(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_vect_gpu), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: y real(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny @@ -895,7 +895,7 @@ contains info = psb_success_ select type(xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) ! Do something different here if ((beta /= dzero).and.y%is_host())& & call y%sync() @@ -915,14 +915,14 @@ contains call y%axpby(m,alpha,x%v,beta,info) end select - end subroutine d_gpu_axpby_v + end subroutine d_cuda_axpby_v - subroutine d_gpu_axpby_a(m,alpha, x, beta, y, info) + subroutine d_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m real(psb_dpk_), intent(in) :: x(:) - class(psb_d_vect_gpu), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: y real(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info @@ -930,13 +930,13 @@ contains & call y%sync() call psb_geaxpby(m,alpha,x,beta,y%v,info) call y%set_host() - end subroutine d_gpu_axpby_a + end subroutine d_cuda_axpby_a - subroutine d_gpu_mlt_v(x, y, info) + subroutine d_cuda_mlt_v(x, y, info) use psi_serial_mod implicit none class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_vect_gpu), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -950,7 +950,7 @@ contains y%v(i) = y%v(i) * xx%v(i) end do call y%set_host() - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) ! Do something different here if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -963,13 +963,13 @@ contains call y%set_host() end select - end subroutine d_gpu_mlt_v + end subroutine d_cuda_mlt_v - subroutine d_gpu_mlt_a(x, y, info) + subroutine d_cuda_mlt_a(x, y, info) use psi_serial_mod implicit none real(psb_dpk_), intent(in) :: x(:) - class(psb_d_vect_gpu), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -977,15 +977,15 @@ contains if (y%is_dev()) call y%sync() call y%psb_d_base_vect_type%mlt(x,info) ! set_host() is invoked in the base method - end subroutine d_gpu_mlt_a + end subroutine d_cuda_mlt_a - subroutine d_gpu_mlt_a_2(alpha,x,y,beta,z,info) + subroutine d_cuda_mlt_a_2(alpha,x,y,beta,z,info) use psi_serial_mod implicit none real(psb_dpk_), intent(in) :: alpha,beta real(psb_dpk_), intent(in) :: x(:) real(psb_dpk_), intent(in) :: y(:) - class(psb_d_vect_gpu), intent(inout) :: z + class(psb_d_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -993,16 +993,16 @@ contains if (z%is_dev()) call z%sync() call z%psb_d_base_vect_type%mlt(alpha,x,y,beta,info) ! set_host() is invoked in the base method - end subroutine d_gpu_mlt_a_2 + end subroutine d_cuda_mlt_a_2 - subroutine d_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + subroutine d_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) use psi_serial_mod use psb_string_mod implicit none real(psb_dpk_), intent(in) :: alpha,beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y - class(psb_d_vect_gpu), intent(inout) :: z + class(psb_d_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n @@ -1025,9 +1025,9 @@ contains ! info = 0 select type(xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type (yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= dzero).and.(z%is_host())) call z%sync() @@ -1049,23 +1049,23 @@ contains call z%psb_d_base_vect_type%mlt(alpha,x,y,beta,info) call z%set_host() end select - end subroutine d_gpu_mlt_v_2 + end subroutine d_cuda_mlt_v_2 - subroutine d_gpu_scal(alpha, x) + subroutine d_cuda_scal(alpha, x) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x real(psb_dpk_), intent (in) :: alpha integer(psb_ipk_) :: info if (x%is_host()) call x%sync() info = scalMultiVecDevice(alpha,x%deviceVect) call x%set_dev() - end subroutine d_gpu_scal + end subroutine d_cuda_scal - function d_gpu_nrm2(n,x) result(res) + function d_cuda_nrm2(n,x) result(res) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1073,11 +1073,11 @@ contains if (x%is_host()) call x%sync() info = nrm2MultiVecDevice(res,n,x%deviceVect) - end function d_gpu_nrm2 + end function d_cuda_nrm2 - function d_gpu_amax(n,x) result(res) + function d_cuda_amax(n,x) result(res) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1085,11 +1085,11 @@ contains if (x%is_host()) call x%sync() info = amaxMultiVecDevice(res,n,x%deviceVect) - end function d_gpu_amax + end function d_cuda_amax - function d_gpu_asum(n,x) result(res) + function d_cuda_asum(n,x) result(res) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1097,11 +1097,11 @@ contains if (x%is_host()) call x%sync() info = asumMultiVecDevice(res,n,x%deviceVect) - end function d_gpu_asum + end function d_cuda_asum - subroutine d_gpu_absval1(x) + subroutine d_cuda_absval1(x) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n integer(psb_ipk_) :: info @@ -1109,18 +1109,18 @@ contains n=x%get_nrows() info = absMultiVecDevice(n,done,x%deviceVect) - end subroutine d_gpu_absval1 + end subroutine d_cuda_absval1 - subroutine d_gpu_absval2(x,y) + subroutine d_cuda_absval2(x,y) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_) :: n integer(psb_ipk_) :: info n=min(x%get_nrows(),y%get_nrows()) select type (yy=> y) - class is (psb_d_vect_gpu) + class is (psb_d_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = absMultiVecDevice(n,done,x%deviceVect,yy%deviceVect) @@ -1129,67 +1129,67 @@ contains if (y%is_dev()) call y%sync() call x%psb_d_base_vect_type%absval(y) end select - end subroutine d_gpu_absval2 + end subroutine d_cuda_absval2 - subroutine d_gpu_vect_finalize(x) + subroutine d_cuda_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_d_vect_gpu), intent(inout) :: x + type(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 call x%free(info) - end subroutine d_gpu_vect_finalize + end subroutine d_cuda_vect_finalize - subroutine d_gpu_ins_v(n,irl,val,dupl,x,info) + subroutine d_cuda_ins_v(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl class(psb_i_base_vect_type), intent(inout) :: irl class(psb_d_base_vect_type), intent(inout) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, isz - logical :: done_gpu + logical :: done_cuda info = 0 if (psb_errstatus_fatal()) return - done_gpu = .false. + done_cuda = .false. select type(virl => irl) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type(vval => val) - class is (psb_d_vect_gpu) + class is (psb_d_vect_cuda) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() info = geinsMultiVecDeviceDouble(n,virl%deviceVect,& & vval%deviceVect,dupl,1,x%deviceVect) call x%set_dev() - done_gpu=.true. + done_cuda=.true. end select end select - if (.not.done_gpu) then + if (.not.done_cuda) then if (irl%is_dev()) call irl%sync() if (val%is_dev()) call val%sync() call x%ins(n,irl%v,val%v,dupl,info) end if if (info /= 0) then - call psb_errpush(info,'gpu_vect_ins') + call psb_errpush(info,'cuda_vect_ins') return end if - end subroutine d_gpu_ins_v + end subroutine d_cuda_ins_v - subroutine d_gpu_ins_a(n,irl,val,dupl,x,info) + subroutine d_cuda_ins_a(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) real(psb_dpk_), intent(in) :: val(:) @@ -1202,11 +1202,11 @@ contains call x%psb_d_base_vect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine d_gpu_ins_a + end subroutine d_cuda_ins_a #endif -end module psb_d_gpu_vect_mod +end module psb_d_cuda_vect_mod ! @@ -1215,7 +1215,7 @@ end module psb_d_gpu_vect_mod -module psb_d_gpu_multivect_mod +module psb_d_cuda_multivect_mod use iso_c_binding use psb_const_mod use psb_error_mod @@ -1224,7 +1224,7 @@ module psb_d_gpu_multivect_mod use psb_i_multivect_mod #ifdef HAVE_SPGPU - use psb_i_gpu_multivect_mod + use psb_i_cuda_multivect_mod use psb_d_vectordev_mod #endif @@ -1232,7 +1232,7 @@ module psb_d_gpu_multivect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_d_base_multivect_type) :: psb_d_multivect_gpu + type, extends(psb_d_base_multivect_type) :: psb_d_multivect_cuda #ifdef HAVE_SPGPU integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 @@ -1240,48 +1240,48 @@ module psb_d_gpu_multivect_mod real(c_double), allocatable :: buffer(:,:) type(c_ptr) :: dt_buf = c_null_ptr contains - procedure, pass(x) :: get_nrows => d_gpu_multi_get_nrows - procedure, pass(x) :: get_ncols => d_gpu_multi_get_ncols - procedure, nopass :: get_fmt => d_gpu_multi_get_fmt -!!$ procedure, pass(x) :: dot_v => d_gpu_multi_dot_v -!!$ procedure, pass(x) :: dot_a => d_gpu_multi_dot_a -!!$ procedure, pass(y) :: axpby_v => d_gpu_multi_axpby_v -!!$ procedure, pass(y) :: axpby_a => d_gpu_multi_axpby_a -!!$ procedure, pass(y) :: mlt_v => d_gpu_multi_mlt_v -!!$ procedure, pass(y) :: mlt_a => d_gpu_multi_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => d_gpu_multi_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => d_gpu_multi_mlt_v_2 -!!$ procedure, pass(x) :: scal => d_gpu_multi_scal -!!$ procedure, pass(x) :: nrm2 => d_gpu_multi_nrm2 -!!$ procedure, pass(x) :: amax => d_gpu_multi_amax -!!$ procedure, pass(x) :: asum => d_gpu_multi_asum - procedure, pass(x) :: all => d_gpu_multi_all - procedure, pass(x) :: zero => d_gpu_multi_zero - procedure, pass(x) :: asb => d_gpu_multi_asb - procedure, pass(x) :: sync => d_gpu_multi_sync - procedure, pass(x) :: sync_space => d_gpu_multi_sync_space - procedure, pass(x) :: bld_x => d_gpu_multi_bld_x - procedure, pass(x) :: bld_n => d_gpu_multi_bld_n - procedure, pass(x) :: free => d_gpu_multi_free - procedure, pass(x) :: ins => d_gpu_multi_ins - procedure, pass(x) :: is_host => d_gpu_multi_is_host - procedure, pass(x) :: is_dev => d_gpu_multi_is_dev - procedure, pass(x) :: is_sync => d_gpu_multi_is_sync - procedure, pass(x) :: set_host => d_gpu_multi_set_host - procedure, pass(x) :: set_dev => d_gpu_multi_set_dev - procedure, pass(x) :: set_sync => d_gpu_multi_set_sync - procedure, pass(x) :: set_scal => d_gpu_multi_set_scal - procedure, pass(x) :: set_vect => d_gpu_multi_set_vect -!!$ procedure, pass(x) :: gthzv_x => d_gpu_multi_gthzv_x -!!$ procedure, pass(y) :: sctb => d_gpu_multi_sctb -!!$ procedure, pass(y) :: sctb_x => d_gpu_multi_sctb_x - final :: d_gpu_multi_vect_finalize + procedure, pass(x) :: get_nrows => d_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => d_cuda_multi_get_ncols + procedure, nopass :: get_fmt => d_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => d_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => d_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => d_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => d_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => d_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => d_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => d_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => d_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => d_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => d_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => d_cuda_multi_amax +!!$ procedure, pass(x) :: asum => d_cuda_multi_asum + procedure, pass(x) :: all => d_cuda_multi_all + procedure, pass(x) :: zero => d_cuda_multi_zero + procedure, pass(x) :: asb => d_cuda_multi_asb + procedure, pass(x) :: sync => d_cuda_multi_sync + procedure, pass(x) :: sync_space => d_cuda_multi_sync_space + procedure, pass(x) :: bld_x => d_cuda_multi_bld_x + procedure, pass(x) :: bld_n => d_cuda_multi_bld_n + procedure, pass(x) :: free => d_cuda_multi_free + procedure, pass(x) :: ins => d_cuda_multi_ins + procedure, pass(x) :: is_host => d_cuda_multi_is_host + procedure, pass(x) :: is_dev => d_cuda_multi_is_dev + procedure, pass(x) :: is_sync => d_cuda_multi_is_sync + procedure, pass(x) :: set_host => d_cuda_multi_set_host + procedure, pass(x) :: set_dev => d_cuda_multi_set_dev + procedure, pass(x) :: set_sync => d_cuda_multi_set_sync + procedure, pass(x) :: set_scal => d_cuda_multi_set_scal + procedure, pass(x) :: set_vect => d_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => d_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => d_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => d_cuda_multi_sctb_x + final :: d_cuda_multi_vect_finalize #endif - end type psb_d_multivect_gpu + end type psb_d_multivect_cuda - public :: psb_d_multivect_gpu + public :: psb_d_multivect_cuda private :: constructor - interface psb_d_multivect_gpu + interface psb_d_multivect_cuda module procedure constructor end interface @@ -1289,7 +1289,7 @@ contains function constructor(x) result(this) real(psb_dpk_) :: x(:,:) - type(psb_d_multivect_gpu) :: this + type(psb_d_multivect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -1299,15 +1299,15 @@ contains #ifdef HAVE_SPGPU -!!$ subroutine d_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ subroutine d_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i,n !!$ class(psb_i_base_multivect_type) :: idx !!$ real(psb_dpk_) :: y(:) -!!$ class(psb_d_multivect_gpu) :: x +!!$ class(psb_d_multivect_cuda) :: x !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (x%is_host()) call x%sync() !!$ @@ -1332,16 +1332,16 @@ contains !!$ end select !!$ !!$ -!!$ end subroutine d_gpu_multi_gthzv_x +!!$ end subroutine d_cuda_multi_gthzv_x !!$ !!$ !!$ -!!$ subroutine d_gpu_multi_sctb(n,idx,x,beta,y) +!!$ subroutine d_cuda_multi_sctb(n,idx,x,beta,y) !!$ implicit none !!$ !use psb_const_mod !!$ integer(psb_ipk_) :: n, idx(:) !!$ real(psb_dpk_) :: beta, x(:) -!!$ class(psb_d_multivect_gpu) :: y +!!$ class(psb_d_multivect_cuda) :: y !!$ integer(psb_ipk_) :: info !!$ !!$ if (n == 0) return @@ -1351,17 +1351,17 @@ contains !!$ call y%psb_d_base_multivect_type%sctb(n,idx,x,beta) !!$ call y%set_host() !!$ -!!$ end subroutine d_gpu_multi_sctb +!!$ end subroutine d_cuda_multi_sctb !!$ -!!$ subroutine d_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ subroutine d_cuda_multi_sctb_x(i,n,idx,x,beta,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i, n !!$ class(psb_i_base_multivect_type) :: idx !!$ real(psb_dpk_) :: beta, x(:) -!!$ class(psb_d_multivect_gpu) :: y +!!$ class(psb_d_multivect_cuda) :: y !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (y%is_host()) call y%sync() !!$ @@ -1387,13 +1387,13 @@ contains !!$ call y%sct(n,ii%v(i:),x,beta) !!$ end select !!$ -!!$ end subroutine d_gpu_multi_sctb_x +!!$ end subroutine d_cuda_multi_sctb_x - subroutine d_gpu_multi_bld_x(x,this) + subroutine d_cuda_multi_bld_x(x,this) use psb_base_mod real(psb_dpk_), intent(in) :: this(:,:) - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info, m, n m=size(this,1) @@ -1403,101 +1403,101 @@ contains call psb_realloc(m,n,x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'d_gpu_multi_bld_x',& + call psb_errpush(info,'d_cuda_multi_bld_x',& & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) end if x%v(1:m,1:n) = this(1:m,1:n) call x%set_host() call x%sync() - end subroutine d_gpu_multi_bld_x + end subroutine d_cuda_multi_bld_x - subroutine d_gpu_multi_bld_n(x,m,n) + subroutine d_cuda_multi_bld_n(x,m,n) integer(psb_ipk_), intent(in) :: m,n - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(m,n,info) if (info /= 0) then - call psb_errpush(info,'d_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + call psb_errpush(info,'d_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) end if - end subroutine d_gpu_multi_bld_n + end subroutine d_cuda_multi_bld_n - subroutine d_gpu_multi_set_host(x) + subroutine d_cuda_multi_set_host(x) implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x x%state = is_host - end subroutine d_gpu_multi_set_host + end subroutine d_cuda_multi_set_host - subroutine d_gpu_multi_set_dev(x) + subroutine d_cuda_multi_set_dev(x) implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x x%state = is_dev - end subroutine d_gpu_multi_set_dev + end subroutine d_cuda_multi_set_dev - subroutine d_gpu_multi_set_sync(x) + subroutine d_cuda_multi_set_sync(x) implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x x%state = is_sync - end subroutine d_gpu_multi_set_sync + end subroutine d_cuda_multi_set_sync - function d_gpu_multi_is_dev(x) result(res) + function d_cuda_multi_is_dev(x) result(res) implicit none - class(psb_d_multivect_gpu), intent(in) :: x + class(psb_d_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function d_gpu_multi_is_dev + end function d_cuda_multi_is_dev - function d_gpu_multi_is_host(x) result(res) + function d_cuda_multi_is_host(x) result(res) implicit none - class(psb_d_multivect_gpu), intent(in) :: x + class(psb_d_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function d_gpu_multi_is_host + end function d_cuda_multi_is_host - function d_gpu_multi_is_sync(x) result(res) + function d_cuda_multi_is_sync(x) result(res) implicit none - class(psb_d_multivect_gpu), intent(in) :: x + class(psb_d_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function d_gpu_multi_is_sync + end function d_cuda_multi_is_sync - function d_gpu_multi_get_nrows(x) result(res) + function d_cuda_multi_get_nrows(x) result(res) implicit none - class(psb_d_multivect_gpu), intent(in) :: x + class(psb_d_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_nrows - end function d_gpu_multi_get_nrows + end function d_cuda_multi_get_nrows - function d_gpu_multi_get_ncols(x) result(res) + function d_cuda_multi_get_ncols(x) result(res) implicit none - class(psb_d_multivect_gpu), intent(in) :: x + class(psb_d_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_ncols - end function d_gpu_multi_get_ncols + end function d_cuda_multi_get_ncols - function d_gpu_multi_get_fmt() result(res) + function d_cuda_multi_get_fmt() result(res) implicit none character(len=5) :: res res = 'dGPU' - end function d_gpu_multi_get_fmt + end function d_cuda_multi_get_fmt -!!$ function d_gpu_multi_dot_v(n,x,y) result(res) +!!$ function d_cuda_multi_dot_v(n,x,y) result(res) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ class(psb_d_base_multivect_type), intent(inout) :: y !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res @@ -1514,13 +1514,13 @@ contains !!$ type is (psb_d_base_multivect_type) !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,x%v,1,yy%v,1) -!!$ type is (psb_d_multivect_gpu) +!!$ type is (psb_d_multivect_cuda) !!$ if (x%is_host()) call x%sync() !!$ if (yy%is_host()) call yy%sync() !!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) !!$ if (info /= 0) then !!$ info = psb_err_internal_error_ -!!$ call psb_errpush(info,'d_gpu_multi_dot_v') +!!$ call psb_errpush(info,'d_cuda_multi_dot_v') !!$ end if !!$ !!$ class default @@ -1529,11 +1529,11 @@ contains !!$ res = y%dot(n,x%v) !!$ end select !!$ -!!$ end function d_gpu_multi_dot_v +!!$ end function d_cuda_multi_dot_v !!$ -!!$ function d_gpu_multi_dot_a(n,x,y) result(res) +!!$ function d_cuda_multi_dot_a(n,x,y) result(res) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ real(psb_dpk_), intent(in) :: y(:) !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res @@ -1542,14 +1542,14 @@ contains !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,y,1,x%v,1) !!$ -!!$ end function d_gpu_multi_dot_a +!!$ end function d_cuda_multi_dot_a !!$ -!!$ subroutine d_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine d_cuda_multi_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ class(psb_d_base_multivect_type), intent(inout) :: x -!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ class(psb_d_multivect_cuda), intent(inout) :: y !!$ real(psb_dpk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: nx, ny @@ -1562,7 +1562,7 @@ contains !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() -!!$ type is (psb_d_multivect_gpu) +!!$ type is (psb_d_multivect_cuda) !!$ ! Do something different here !!$ if ((beta /= dzero).and.y%is_host())& !!$ & call y%sync() @@ -1581,27 +1581,27 @@ contains !!$ call y%axpby(m,alpha,x%v,beta,info) !!$ end select !!$ -!!$ end subroutine d_gpu_multi_axpby_v +!!$ end subroutine d_cuda_multi_axpby_v !!$ -!!$ subroutine d_gpu_multi_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine d_cuda_multi_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ real(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ class(psb_d_multivect_cuda), intent(inout) :: y !!$ real(psb_dpk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ if (y%is_dev()) call y%sync() !!$ call psb_geaxpby(m,alpha,x,beta,y%v,info) !!$ call y%set_host() -!!$ end subroutine d_gpu_multi_axpby_a +!!$ end subroutine d_cuda_multi_axpby_a !!$ -!!$ subroutine d_gpu_multi_mlt_v(x, y, info) +!!$ subroutine d_cuda_multi_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_d_base_multivect_type), intent(inout) :: x -!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ class(psb_d_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ integer(psb_ipk_) :: i, n @@ -1615,7 +1615,7 @@ contains !!$ y%v(i) = y%v(i) * xx%v(i) !!$ end do !!$ call y%set_host() -!!$ type is (psb_d_multivect_gpu) +!!$ type is (psb_d_multivect_cuda) !!$ ! Do something different here !!$ if (y%is_host()) call y%sync() !!$ if (xx%is_host()) call xx%sync() @@ -1627,13 +1627,13 @@ contains !!$ call y%set_host() !!$ end select !!$ -!!$ end subroutine d_gpu_multi_mlt_v +!!$ end subroutine d_cuda_multi_mlt_v !!$ -!!$ subroutine d_gpu_multi_mlt_a(x, y, info) +!!$ subroutine d_cuda_multi_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ class(psb_d_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1641,15 +1641,15 @@ contains !!$ call y%sync() !!$ call y%psb_d_base_multivect_type%mlt(x,info) !!$ call y%set_host() -!!$ end subroutine d_gpu_multi_mlt_a +!!$ end subroutine d_cuda_multi_mlt_a !!$ -!!$ subroutine d_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine d_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_dpk_), intent(in) :: alpha,beta !!$ real(psb_dpk_), intent(in) :: x(:) !!$ real(psb_dpk_), intent(in) :: y(:) -!!$ class(psb_d_multivect_gpu), intent(inout) :: z +!!$ class(psb_d_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1657,16 +1657,16 @@ contains !!$ if (z%is_dev()) call z%sync() !!$ call z%psb_d_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() -!!$ end subroutine d_gpu_multi_mlt_a_2 +!!$ end subroutine d_cuda_multi_mlt_a_2 !!$ -!!$ subroutine d_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ subroutine d_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod !!$ implicit none !!$ real(psb_dpk_), intent(in) :: alpha,beta !!$ class(psb_d_base_multivect_type), intent(inout) :: x !!$ class(psb_d_base_multivect_type), intent(inout) :: y -!!$ class(psb_d_multivect_gpu), intent(inout) :: z +!!$ class(psb_d_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ character(len=1), intent(in), optional :: conjgx, conjgy !!$ integer(psb_ipk_) :: i, n @@ -1689,9 +1689,9 @@ contains !!$ ! !!$ info = 0 !!$ select type(xx => x) -!!$ type is (psb_d_multivect_gpu) +!!$ type is (psb_d_multivect_cuda) !!$ select type (yy => y) -!!$ type is (psb_d_multivect_gpu) +!!$ type is (psb_d_multivect_cuda) !!$ if (xx%is_host()) call xx%sync() !!$ if (yy%is_host()) call yy%sync() !!$ ! Z state is irrelevant: it will be done on the GPU. @@ -1711,11 +1711,11 @@ contains !!$ call z%psb_d_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() !!$ end select -!!$ end subroutine d_gpu_multi_mlt_v_2 +!!$ end subroutine d_cuda_multi_mlt_v_2 - subroutine d_gpu_multi_set_scal(x,val) - class(psb_d_multivect_gpu), intent(inout) :: x + subroutine d_cuda_multi_set_scal(x,val) + class(psb_d_multivect_cuda), intent(inout) :: x real(psb_dpk_), intent(in) :: val integer(psb_ipk_) :: info @@ -1723,10 +1723,10 @@ contains if (x%is_dev()) call x%sync() call x%psb_d_base_multivect_type%set_scal(val) call x%set_host() - end subroutine d_gpu_multi_set_scal + end subroutine d_cuda_multi_set_scal - subroutine d_gpu_multi_set_vect(x,val) - class(psb_d_multivect_gpu), intent(inout) :: x + subroutine d_cuda_multi_set_vect(x,val) + class(psb_d_multivect_cuda), intent(inout) :: x real(psb_dpk_), intent(in) :: val(:,:) integer(psb_ipk_) :: nr integer(psb_ipk_) :: info @@ -1735,24 +1735,24 @@ contains call x%psb_d_base_multivect_type%set_vect(val) call x%set_host() - end subroutine d_gpu_multi_set_vect + end subroutine d_cuda_multi_set_vect -!!$ subroutine d_gpu_multi_scal(alpha, x) +!!$ subroutine d_cuda_multi_scal(alpha, x) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ real(psb_dpk_), intent (in) :: alpha !!$ !!$ if (x%is_dev()) call x%sync() !!$ call x%psb_d_base_multivect_type%scal(alpha) !!$ call x%set_host() -!!$ end subroutine d_gpu_multi_scal +!!$ end subroutine d_cuda_multi_scal !!$ !!$ -!!$ function d_gpu_multi_nrm2(n,x) result(res) +!!$ function d_cuda_multi_nrm2(n,x) result(res) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ integer(psb_ipk_) :: info @@ -1760,36 +1760,36 @@ contains !!$ if (x%is_host()) call x%sync() !!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) !!$ -!!$ end function d_gpu_multi_nrm2 +!!$ end function d_cuda_multi_nrm2 !!$ -!!$ function d_gpu_multi_amax(n,x) result(res) +!!$ function d_cuda_multi_amax(n,x) result(res) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = maxval(abs(x%v(1:n))) !!$ -!!$ end function d_gpu_multi_amax +!!$ end function d_cuda_multi_amax !!$ -!!$ function d_gpu_multi_asum(n,x) result(res) +!!$ function d_cuda_multi_asum(n,x) result(res) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = sum(abs(x%v(1:n))) !!$ -!!$ end function d_gpu_multi_asum +!!$ end function d_cuda_multi_asum - subroutine d_gpu_multi_all(m,n, x, info) + subroutine d_cuda_multi_all(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_multivect_gpu), intent(out) :: x + class(psb_d_multivect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(m,n,x%v,info,pad=dzero) @@ -1799,26 +1799,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'d_gpu_multi_all',& + call psb_errpush(info,'d_cuda_multi_all',& & i_err=(/m,n,n,n,n/)) end if - end subroutine d_gpu_multi_all + end subroutine d_cuda_multi_all - subroutine d_gpu_multi_zero(x) + subroutine d_cuda_multi_zero(x) use psi_serial_mod implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine d_gpu_multi_zero + end subroutine d_cuda_multi_zero - subroutine d_gpu_multi_asb(m,n, x, info) + subroutine d_cuda_multi_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nd, nc @@ -1838,12 +1838,12 @@ contains call x%set_host() end if end if - end subroutine d_gpu_multi_asb + end subroutine d_cuda_multi_asb - subroutine d_gpu_multi_sync_space(x,info) + subroutine d_cuda_multi_sync_space(x,info) use psb_realloc_mod implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: mh,nh,md,nd @@ -1896,11 +1896,11 @@ contains end if - end subroutine d_gpu_multi_sync_space + end subroutine d_cuda_multi_sync_space - subroutine d_gpu_multi_sync(x) + subroutine d_cuda_multi_sync(x) implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -1916,16 +1916,16 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'d_gpu_multi_sync') + call psb_errpush(info,'d_cuda_multi_sync') end if - end subroutine d_gpu_multi_sync + end subroutine d_cuda_multi_sync - subroutine d_gpu_multi_free(x, info) + subroutine d_cuda_multi_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 @@ -1940,13 +1940,13 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine d_gpu_multi_free + end subroutine d_cuda_multi_free - subroutine d_gpu_multi_vect_finalize(x) + subroutine d_cuda_multi_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_d_multivect_gpu), intent(inout) :: x + type(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 @@ -1961,12 +1961,12 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine d_gpu_multi_vect_finalize + end subroutine d_cuda_multi_vect_finalize - subroutine d_gpu_multi_ins(n,irl,val,dupl,x,info) + subroutine d_cuda_multi_ins(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) real(psb_dpk_), intent(in) :: val(:,:) @@ -1979,11 +1979,11 @@ contains call x%psb_d_base_multivect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine d_gpu_multi_ins + end subroutine d_cuda_multi_ins #endif -end module psb_d_gpu_multivect_mod +end module psb_d_cuda_multivect_mod diff --git a/cuda/psb_i_csrg_mat_mod.F90 b/cuda/psb_i_csrg_mat_mod.F90 deleted file mode 100644 index 9a4a3852..00000000 --- a/cuda/psb_i_csrg_mat_mod.F90 +++ /dev/null @@ -1,393 +0,0 @@ -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! - - -module psb_i_csrg_mat_mod - - use iso_c_binding - use psb_i_mat_mod - use cusparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_i_csr_sparse_mat) :: psb_i_csrg_sparse_mat - ! - ! cuSPARSE 4.0 CSR format. - ! - ! - ! - ! - ! -#ifdef HAVE_SPGPU - type(i_Cmat) :: deviceMat - integer(psb_ipk_) :: devstate = is_host - - contains - procedure, nopass :: get_fmt => i_csrg_get_fmt - procedure, pass(a) :: sizeof => i_csrg_sizeof - procedure, pass(a) :: vect_mv => psb_i_csrg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_i_csrg_inner_vect_sv - procedure, pass(a) :: csmm => psb_i_csrg_csmm - procedure, pass(a) :: csmv => psb_i_csrg_csmv - procedure, pass(a) :: scals => psb_i_csrg_scals - procedure, pass(a) :: scalv => psb_i_csrg_scal - procedure, pass(a) :: reallocate_nz => psb_i_csrg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_i_csrg_allocate_mnnz - ! Note: we do *not* need the TO methods, because the parent type - ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_csrg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_i_cp_csrg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_csrg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_i_mv_csrg_from_fmt - procedure, pass(a) :: free => i_csrg_free - procedure, pass(a) :: mold => psb_i_csrg_mold - procedure, pass(a) :: is_host => i_csrg_is_host - procedure, pass(a) :: is_dev => i_csrg_is_dev - procedure, pass(a) :: is_sync => i_csrg_is_sync - procedure, pass(a) :: set_host => i_csrg_set_host - procedure, pass(a) :: set_dev => i_csrg_set_dev - procedure, pass(a) :: set_sync => i_csrg_set_sync - procedure, pass(a) :: sync => i_csrg_sync - procedure, pass(a) :: to_gpu => psb_i_csrg_to_gpu - procedure, pass(a) :: from_gpu => psb_i_csrg_from_gpu - final :: i_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_csrg_mold -#endif - end type psb_i_csrg_sparse_mat - -#ifdef HAVE_SPGPU - private :: i_csrg_get_nzeros, i_csrg_free, i_csrg_get_fmt, & - & i_csrg_get_size, i_csrg_sizeof, i_csrg_get_nz_row - - - interface - subroutine psb_i_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - integer(psb_ipk_), intent(in) :: alpha, beta - class(psb_i_base_vect_type), intent(inout) :: x - class(psb_i_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_i_csrg_inner_vect_sv - end interface - - - interface - subroutine psb_i_csrg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - integer(psb_ipk_), intent(in) :: alpha, beta - class(psb_i_base_vect_type), intent(inout) :: x - class(psb_i_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_i_csrg_vect_mv - end interface - - interface - subroutine psb_i_csrg_reallocate_nz(nz,a) - import :: psb_i_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: nz - class(psb_i_csrg_sparse_mat), intent(inout) :: a - end subroutine psb_i_csrg_reallocate_nz - end interface - - interface - subroutine psb_i_csrg_allocate_mnnz(m,n,a,nz) - import :: psb_i_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: m,n - class(psb_i_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_i_csrg_allocate_mnnz - end interface - - interface - subroutine psb_i_csrg_mold(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_csrg_mold - end interface - - interface - subroutine psb_i_csrg_to_gpu(a,info, nzrm) - import :: psb_i_csrg_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_i_csrg_to_gpu - end interface - - interface - subroutine psb_i_csrg_from_gpu(a,info) - import :: psb_i_csrg_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_csrg_from_gpu - end interface - - interface - subroutine psb_i_cp_csrg_from_coo(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - class(psb_i_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_csrg_from_coo - end interface - - interface - subroutine psb_i_cp_csrg_from_fmt(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - class(psb_i_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_csrg_from_fmt - end interface - - interface - subroutine psb_i_mv_csrg_from_coo(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - class(psb_i_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_csrg_from_coo - end interface - - interface - subroutine psb_i_mv_csrg_from_fmt(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - class(psb_i_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_csrg_from_fmt - end interface - - interface - subroutine psb_i_csrg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - integer(psb_ipk_), intent(in) :: alpha, beta, x(:) - integer(psb_ipk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_i_csrg_csmv - end interface - interface - subroutine psb_i_csrg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) - integer(psb_ipk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_i_csrg_csmm - end interface - - interface - subroutine psb_i_csrg_scal(d,a,info,side) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side - end subroutine psb_i_csrg_scal - end interface - - interface - subroutine psb_i_csrg_scals(d,a,info) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_csrg_scals - end interface - - -contains - - ! == =================================== - ! - ! - ! - ! Getters - ! - ! - ! - ! - ! - ! == =================================== - - - function i_csrg_sizeof(a) result(res) - implicit none - class(psb_i_csrg_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - res = 8 - res = res + psb_sizeof_ip * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - ! Should we account for the shadow data structure - ! on the GPU device side? - ! res = 2*res - - end function i_csrg_sizeof - - function i_csrg_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSRG' - end function i_csrg_get_fmt - - - - ! == =================================== - ! - ! - ! - ! Data management - ! - ! - ! - ! - ! - ! == =================================== - - - subroutine i_csrg_set_host(a) - implicit none - class(psb_i_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine i_csrg_set_host - - subroutine i_csrg_set_dev(a) - implicit none - class(psb_i_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine i_csrg_set_dev - - subroutine i_csrg_set_sync(a) - implicit none - class(psb_i_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine i_csrg_set_sync - - function i_csrg_is_dev(a) result(res) - implicit none - class(psb_i_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function i_csrg_is_dev - - function i_csrg_is_host(a) result(res) - implicit none - class(psb_i_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function i_csrg_is_host - - function i_csrg_is_sync(a) result(res) - implicit none - class(psb_i_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function i_csrg_is_sync - - - subroutine i_csrg_sync(a) - implicit none - class(psb_i_csrg_sparse_mat), target, intent(in) :: a - class(psb_i_csrg_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (tmpa%is_host()) then - call tmpa%to_gpu(info) - else if (tmpa%is_dev()) then - call tmpa%from_gpu(info) - end if - call tmpa%set_sync() - return - - end subroutine i_csrg_sync - - subroutine i_csrg_free(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - class(psb_i_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - call a%psb_i_csr_sparse_mat%free() - - return - - end subroutine i_csrg_free - - subroutine i_csrg_finalize(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - type(psb_i_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - - return - - end subroutine i_csrg_finalize - -#else - interface - subroutine psb_i_csrg_mold(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_csrg_mold - end interface - -#endif - -end module psb_i_csrg_mat_mod diff --git a/cuda/psb_i_cuda_csrg_mat_mod.F90 b/cuda/psb_i_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..de0eac09 --- /dev/null +++ b/cuda/psb_i_cuda_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_cuda_csrg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_csr_sparse_mat) :: psb_i_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(i_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => i_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_i_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_i_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_i_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => i_cuda_csrg_free + procedure, pass(a) :: mold => psb_i_cuda_csrg_mold + procedure, pass(a) :: is_host => i_cuda_csrg_is_host + procedure, pass(a) :: is_dev => i_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => i_cuda_csrg_is_sync + procedure, pass(a) :: set_host => i_cuda_csrg_set_host + procedure, pass(a) :: set_dev => i_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => i_cuda_csrg_set_sync + procedure, pass(a) :: sync => i_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_i_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_i_cuda_csrg_from_gpu + final :: i_cuda_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_cuda_csrg_mold +#endif + end type psb_i_cuda_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_cuda_csrg_get_nzeros, i_cuda_csrg_free, i_cuda_csrg_get_fmt, & + & i_cuda_csrg_get_size, i_cuda_csrg_sizeof, i_cuda_csrg_get_nz_row + + + interface + subroutine psb_i_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_i_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_i_cuda_csrg_reallocate_nz(nz,a) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_i_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_i_cuda_csrg_mold(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_mold + end interface + + interface + subroutine psb_i_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_i_cuda_csrg_from_gpu(a,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_i_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_i_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_i_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_i_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_i_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_csmv + end interface + interface + subroutine psb_i_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_csmm + end interface + + interface + subroutine psb_i_cuda_csrg_scal(d,a,info,side) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_cuda_csrg_scal + end interface + + interface + subroutine psb_i_cuda_csrg_scals(d,a,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_ip * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_cuda_csrg_sizeof + + function i_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function i_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine i_cuda_csrg_set_host(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine i_cuda_csrg_set_host + + subroutine i_cuda_csrg_set_dev(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine i_cuda_csrg_set_dev + + subroutine i_cuda_csrg_set_sync(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine i_cuda_csrg_set_sync + + function i_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function i_cuda_csrg_is_dev + + function i_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function i_cuda_csrg_is_host + + function i_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function i_cuda_csrg_is_sync + + + subroutine i_cuda_csrg_sync(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_i_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine i_cuda_csrg_sync + + subroutine i_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_i_csr_sparse_mat%free() + + return + + end subroutine i_cuda_csrg_free + + subroutine i_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine i_cuda_csrg_finalize + +#else + interface + subroutine psb_i_cuda_csrg_mold(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_mold + end interface + +#endif + +end module psb_i_cuda_csrg_mat_mod diff --git a/cuda/psb_i_diag_mat_mod.F90 b/cuda/psb_i_cuda_diag_mat_mod.F90 similarity index 52% rename from cuda/psb_i_diag_mat_mod.F90 rename to cuda/psb_i_cuda_diag_mat_mod.F90 index b54ee8d5..94a3cc3e 100644 --- a/cuda/psb_i_diag_mat_mod.F90 +++ b/cuda/psb_i_cuda_diag_mat_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_i_diag_mat_mod +module psb_i_cuda_diag_mat_mod use iso_c_binding use psb_base_mod use psb_i_dia_mat_mod - type, extends(psb_i_dia_sparse_mat) :: psb_i_diag_sparse_mat + type, extends(psb_i_dia_sparse_mat) :: psb_i_cuda_diag_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -48,170 +48,170 @@ module psb_i_diag_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => i_diag_get_fmt - procedure, pass(a) :: sizeof => i_diag_sizeof - procedure, pass(a) :: vect_mv => psb_i_diag_vect_mv -! procedure, pass(a) :: csmm => psb_i_diag_csmm - procedure, pass(a) :: csmv => psb_i_diag_csmv -! procedure, pass(a) :: in_vect_sv => psb_i_diag_inner_vect_sv -! procedure, pass(a) :: scals => psb_i_diag_scals -! procedure, pass(a) :: scalv => psb_i_diag_scal -! procedure, pass(a) :: reallocate_nz => psb_i_diag_reallocate_nz -! procedure, pass(a) :: allocate_mnnz => psb_i_diag_allocate_mnnz + procedure, nopass :: get_fmt => i_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => i_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_i_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_i_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_i_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_i_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_i_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_i_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_i_cuda_diag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_diag_from_coo -! procedure, pass(a) :: cp_from_fmt => psb_i_cp_diag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_diag_from_coo -! procedure, pass(a) :: mv_from_fmt => psb_i_mv_diag_from_fmt - procedure, pass(a) :: free => i_diag_free - procedure, pass(a) :: mold => psb_i_diag_mold - procedure, pass(a) :: to_gpu => psb_i_diag_to_gpu - final :: i_diag_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => i_cuda_diag_free + procedure, pass(a) :: mold => psb_i_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_diag_to_gpu + final :: i_cuda_diag_finalize #else contains - procedure, pass(a) :: mold => psb_i_diag_mold + procedure, pass(a) :: mold => psb_i_cuda_diag_mold #endif - end type psb_i_diag_sparse_mat + end type psb_i_cuda_diag_sparse_mat #ifdef HAVE_SPGPU - private :: i_diag_get_nzeros, i_diag_free, i_diag_get_fmt, & - & i_diag_get_size, i_diag_sizeof, i_diag_get_nz_row + private :: i_cuda_diag_get_nzeros, i_cuda_diag_free, i_cuda_diag_get_fmt, & + & i_cuda_diag_get_size, i_cuda_diag_sizeof, i_cuda_diag_get_nz_row interface - subroutine psb_i_diag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_diag_vect_mv + end subroutine psb_i_cuda_diag_vect_mv end interface interface - subroutine psb_i_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_i_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_diag_inner_vect_sv + end subroutine psb_i_cuda_diag_inner_vect_sv end interface interface - subroutine psb_i_diag_reallocate_nz(nz,a) - import :: psb_i_diag_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_diag_reallocate_nz(nz,a) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_i_diag_sparse_mat), intent(inout) :: a - end subroutine psb_i_diag_reallocate_nz + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_diag_reallocate_nz end interface interface - subroutine psb_i_diag_allocate_mnnz(m,n,a,nz) - import :: psb_i_diag_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_i_diag_sparse_mat), intent(inout) :: a + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_i_diag_allocate_mnnz + end subroutine psb_i_cuda_diag_allocate_mnnz end interface interface - subroutine psb_i_diag_mold(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_mold(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_diag_mold + end subroutine psb_i_cuda_diag_mold end interface interface - subroutine psb_i_diag_to_gpu(a,info, nzrm) - import :: psb_i_diag_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_i_diag_to_gpu + end subroutine psb_i_cuda_diag_to_gpu end interface interface - subroutine psb_i_cp_diag_from_coo(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_diag_from_coo(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_diag_from_coo + end subroutine psb_i_cuda_cp_diag_from_coo end interface interface - subroutine psb_i_cp_diag_from_fmt(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_diag_from_fmt + end subroutine psb_i_cuda_cp_diag_from_fmt end interface interface - subroutine psb_i_mv_diag_from_coo(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_diag_from_coo(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_diag_from_coo + end subroutine psb_i_cuda_mv_diag_from_coo end interface interface - subroutine psb_i_mv_diag_from_fmt(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_diag_from_fmt + end subroutine psb_i_cuda_mv_diag_from_fmt end interface interface - subroutine psb_i_diag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:) integer(psb_ipk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_diag_csmv + end subroutine psb_i_cuda_diag_csmv end interface interface - subroutine psb_i_diag_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) integer(psb_ipk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_diag_csmm + end subroutine psb_i_cuda_diag_csmm end interface interface - subroutine psb_i_diag_scal(d,a,info, side) - import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_diag_scal(d,a,info, side) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_i_diag_scal + end subroutine psb_i_cuda_diag_scal end interface interface - subroutine psb_i_diag_scals(d,a,info) - import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_diag_scals(d,a,info) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_diag_scals + end subroutine psb_i_cuda_diag_scals end interface @@ -230,9 +230,9 @@ contains ! == =================================== - function i_diag_sizeof(a) result(res) + function i_cuda_diag_sizeof(a) result(res) implicit none - class(psb_i_diag_sparse_mat), intent(in) :: a + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function i_diag_sizeof + end function i_cuda_diag_sizeof - function i_diag_get_fmt() result(res) + function i_cuda_diag_get_fmt() result(res) implicit none character(len=5) :: res res = 'DIAG' - end function i_diag_get_fmt + end function i_cuda_diag_get_fmt @@ -265,11 +265,11 @@ contains ! ! == =================================== - subroutine i_diag_free(a) + subroutine i_cuda_diag_free(a) use diagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_diag_sparse_mat), intent(inout) :: a + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) @@ -278,31 +278,31 @@ contains return - end subroutine i_diag_free + end subroutine i_cuda_diag_free - subroutine i_diag_finalize(a) + subroutine i_cuda_diag_finalize(a) use diagdev_mod implicit none - type(psb_i_diag_sparse_mat), intent(inout) :: a + type(psb_i_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine i_diag_finalize + end subroutine i_cuda_diag_finalize #else interface - subroutine psb_i_diag_mold(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_mold(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_diag_mold + end subroutine psb_i_cuda_diag_mold end interface #endif -end module psb_i_diag_mat_mod +end module psb_i_cuda_diag_mat_mod diff --git a/cuda/psb_i_dnsg_mat_mod.F90 b/cuda/psb_i_cuda_dnsg_mat_mod.F90 similarity index 51% rename from cuda/psb_i_dnsg_mat_mod.F90 rename to cuda/psb_i_cuda_dnsg_mat_mod.F90 index 978996ae..f357977e 100644 --- a/cuda/psb_i_dnsg_mat_mod.F90 +++ b/cuda/psb_i_cuda_dnsg_mat_mod.F90 @@ -30,14 +30,14 @@ ! -module psb_i_dnsg_mat_mod +module psb_i_cuda_dnsg_mat_mod use iso_c_binding use psb_i_mat_mod use psb_i_dns_mat_mod use dnsdev_mod - type, extends(psb_i_dns_sparse_mat) :: psb_i_dnsg_sparse_mat + type, extends(psb_i_dns_sparse_mat) :: psb_i_cuda_dnsg_sparse_mat ! ! ITPACK/DNS format, extended. ! We are adding here the routines to create a copy of the data @@ -49,169 +49,169 @@ module psb_i_dnsg_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => i_dnsg_get_fmt - ! procedure, pass(a) :: sizeof => i_dnsg_sizeof - procedure, pass(a) :: vect_mv => psb_i_dnsg_vect_mv -!!$ procedure, pass(a) :: csmm => psb_i_dnsg_csmm -!!$ procedure, pass(a) :: csmv => psb_i_dnsg_csmv -!!$ procedure, pass(a) :: in_vect_sv => psb_i_dnsg_inner_vect_sv -!!$ procedure, pass(a) :: scals => psb_i_dnsg_scals -!!$ procedure, pass(a) :: scalv => psb_i_dnsg_scal -!!$ procedure, pass(a) :: reallocate_nz => psb_i_dnsg_reallocate_nz -!!$ procedure, pass(a) :: allocate_mnnz => psb_i_dnsg_allocate_mnnz + procedure, nopass :: get_fmt => i_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => i_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_i_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_i_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_i_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_i_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_i_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_i_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_i_cuda_dnsg_allocate_mnnz ! Note: we *do* need the TO methods, because of the need to invoke SYNC ! - procedure, pass(a) :: cp_from_coo => psb_i_cp_dnsg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_i_cp_dnsg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_dnsg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_i_mv_dnsg_from_fmt - procedure, pass(a) :: free => i_dnsg_free - procedure, pass(a) :: mold => psb_i_dnsg_mold - procedure, pass(a) :: to_gpu => psb_i_dnsg_to_gpu - final :: i_dnsg_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => i_cuda_dnsg_free + procedure, pass(a) :: mold => psb_i_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_dnsg_to_gpu + final :: i_cuda_dnsg_finalize #else contains - procedure, pass(a) :: mold => psb_i_dnsg_mold + procedure, pass(a) :: mold => psb_i_cuda_dnsg_mold #endif - end type psb_i_dnsg_sparse_mat + end type psb_i_cuda_dnsg_sparse_mat #ifdef HAVE_SPGPU - private :: i_dnsg_get_nzeros, i_dnsg_free, i_dnsg_get_fmt, & - & i_dnsg_get_size, i_dnsg_get_nz_row + private :: i_cuda_dnsg_get_nzeros, i_cuda_dnsg_free, i_cuda_dnsg_get_fmt, & + & i_cuda_dnsg_get_size, i_cuda_dnsg_get_nz_row interface - subroutine psb_i_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_dnsg_vect_mv + end subroutine psb_i_cuda_dnsg_vect_mv end interface !!$ !!$ interface -!!$ subroutine psb_i_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_i_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type -!!$ class(psb_i_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_i_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a !!$ integer(psb_ipk_), intent(in) :: alpha, beta !!$ class(psb_i_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_i_dnsg_inner_vect_sv +!!$ end subroutine psb_i_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_i_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_i_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_i_dnsg_reallocate_nz +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_i_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_i_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_i_dnsg_allocate_mnnz +!!$ end subroutine psb_i_cuda_dnsg_allocate_mnnz !!$ end interface interface - subroutine psb_i_dnsg_mold(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_dnsg_mold(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_dnsg_mold + end subroutine psb_i_cuda_dnsg_mold end interface interface - subroutine psb_i_dnsg_to_gpu(a,info) - import :: psb_i_dnsg_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_dnsg_to_gpu(a,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_dnsg_to_gpu + end subroutine psb_i_cuda_dnsg_to_gpu end interface interface - subroutine psb_i_cp_dnsg_from_coo(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_dnsg_from_coo + end subroutine psb_i_cuda_cp_dnsg_from_coo end interface interface - subroutine psb_i_cp_dnsg_from_fmt(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_dnsg_from_fmt + end subroutine psb_i_cuda_cp_dnsg_from_fmt end interface interface - subroutine psb_i_mv_dnsg_from_coo(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_dnsg_from_coo + end subroutine psb_i_cuda_mv_dnsg_from_coo end interface interface - subroutine psb_i_mv_dnsg_from_fmt(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_dnsg_from_fmt + end subroutine psb_i_cuda_mv_dnsg_from_fmt end interface !!$ interface -!!$ subroutine psb_i_dnsg_csmv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_i_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a !!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:) !!$ integer(psb_ipk_), intent(inout) :: y(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_i_dnsg_csmv +!!$ end subroutine psb_i_cuda_dnsg_csmv !!$ end interface !!$ interface -!!$ subroutine psb_i_dnsg_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_i_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a !!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) !!$ integer(psb_ipk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_i_dnsg_csmm +!!$ end subroutine psb_i_cuda_dnsg_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_dnsg_scal(d,a,info, side) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_i_dnsg_scal +!!$ end subroutine psb_i_cuda_dnsg_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_dnsg_scals(d,a,info) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_i_dnsg_scals +!!$ end subroutine psb_i_cuda_dnsg_scals !!$ end interface !!$ @@ -231,11 +231,11 @@ contains - function i_dnsg_get_fmt() result(res) + function i_cuda_dnsg_get_fmt() result(res) implicit none character(len=5) :: res res = 'DNSG' - end function i_dnsg_get_fmt + end function i_cuda_dnsg_get_fmt @@ -251,11 +251,11 @@ contains ! ! == =================================== - subroutine i_dnsg_free(a) + subroutine i_cuda_dnsg_free(a) use dnsdev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) @@ -264,31 +264,31 @@ contains return - end subroutine i_dnsg_free + end subroutine i_cuda_dnsg_free - subroutine i_dnsg_finalize(a) + subroutine i_cuda_dnsg_finalize(a) use dnsdev_mod implicit none - type(psb_i_dnsg_sparse_mat), intent(inout) :: a + type(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine i_dnsg_finalize + end subroutine i_cuda_dnsg_finalize #else interface - subroutine psb_i_dnsg_mold(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_dnsg_mold(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_dnsg_mold + end subroutine psb_i_cuda_dnsg_mold end interface #endif -end module psb_i_dnsg_mat_mod +end module psb_i_cuda_dnsg_mat_mod diff --git a/cuda/psb_i_elg_mat_mod.F90 b/cuda/psb_i_cuda_elg_mat_mod.F90 similarity index 50% rename from cuda/psb_i_elg_mat_mod.F90 rename to cuda/psb_i_cuda_elg_mat_mod.F90 index a421e611..aa3e2d4d 100644 --- a/cuda/psb_i_elg_mat_mod.F90 +++ b/cuda/psb_i_cuda_elg_mat_mod.F90 @@ -30,18 +30,18 @@ ! -module psb_i_elg_mat_mod +module psb_i_cuda_elg_mat_mod use iso_c_binding use psb_i_mat_mod use psb_i_ell_mat_mod - use psb_i_gpu_vect_mod + use psb_i_cuda_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_i_ell_sparse_mat) :: psb_i_elg_sparse_mat + type, extends(psb_i_ell_sparse_mat) :: psb_i_cuda_elg_sparse_mat ! ! ITPACK/ELL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,221 +54,221 @@ module psb_i_elg_mat_mod integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => i_elg_get_fmt - procedure, pass(a) :: sizeof => i_elg_sizeof - procedure, pass(a) :: vect_mv => psb_i_elg_vect_mv - procedure, pass(a) :: csmm => psb_i_elg_csmm - procedure, pass(a) :: csmv => psb_i_elg_csmv - procedure, pass(a) :: in_vect_sv => psb_i_elg_inner_vect_sv - procedure, pass(a) :: scals => psb_i_elg_scals - procedure, pass(a) :: scalv => psb_i_elg_scal - procedure, pass(a) :: reallocate_nz => psb_i_elg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_i_elg_allocate_mnnz - procedure, pass(a) :: reinit => i_elg_reinit + procedure, nopass :: get_fmt => i_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_i_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_i_cuda_elg_scals + procedure, pass(a) :: scalv => psb_i_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => i_cuda_elg_reinit ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_elg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_i_cp_elg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_elg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_i_mv_elg_from_fmt - procedure, pass(a) :: free => i_elg_free - procedure, pass(a) :: mold => psb_i_elg_mold - procedure, pass(a) :: csput_a => psb_i_elg_csput_a - procedure, pass(a) :: csput_v => psb_i_elg_csput_v - procedure, pass(a) :: is_host => i_elg_is_host - procedure, pass(a) :: is_dev => i_elg_is_dev - procedure, pass(a) :: is_sync => i_elg_is_sync - procedure, pass(a) :: set_host => i_elg_set_host - procedure, pass(a) :: set_dev => i_elg_set_dev - procedure, pass(a) :: set_sync => i_elg_set_sync - procedure, pass(a) :: sync => i_elg_sync - procedure, pass(a) :: from_gpu => psb_i_elg_from_gpu - procedure, pass(a) :: to_gpu => psb_i_elg_to_gpu - procedure, pass(a) :: asb => psb_i_elg_asb - final :: i_elg_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => i_cuda_elg_free + procedure, pass(a) :: mold => psb_i_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_i_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_i_cuda_elg_csput_v + procedure, pass(a) :: is_host => i_cuda_elg_is_host + procedure, pass(a) :: is_dev => i_cuda_elg_is_dev + procedure, pass(a) :: is_sync => i_cuda_elg_is_sync + procedure, pass(a) :: set_host => i_cuda_elg_set_host + procedure, pass(a) :: set_dev => i_cuda_elg_set_dev + procedure, pass(a) :: set_sync => i_cuda_elg_set_sync + procedure, pass(a) :: sync => i_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_i_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_i_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_i_cuda_elg_asb + final :: i_cuda_elg_finalize #else contains - procedure, pass(a) :: mold => psb_i_elg_mold - procedure, pass(a) :: asb => psb_i_elg_asb + procedure, pass(a) :: mold => psb_i_cuda_elg_mold + procedure, pass(a) :: asb => psb_i_cuda_elg_asb #endif - end type psb_i_elg_sparse_mat + end type psb_i_cuda_elg_sparse_mat #ifdef HAVE_SPGPU - private :: i_elg_get_nzeros, i_elg_free, i_elg_get_fmt, & - & i_elg_get_size, i_elg_sizeof, i_elg_get_nz_row, i_elg_sync + private :: i_cuda_elg_get_nzeros, i_cuda_elg_free, i_cuda_elg_get_fmt, & + & i_cuda_elg_get_size, i_cuda_elg_sizeof, i_cuda_elg_get_nz_row, i_cuda_elg_sync interface - subroutine psb_i_elg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_elg_vect_mv + end subroutine psb_i_cuda_elg_vect_mv end interface interface - subroutine psb_i_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_i_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_elg_inner_vect_sv + end subroutine psb_i_cuda_elg_inner_vect_sv end interface interface - subroutine psb_i_elg_reallocate_nz(nz,a) - import :: psb_i_elg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_elg_reallocate_nz(nz,a) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_i_elg_sparse_mat), intent(inout) :: a - end subroutine psb_i_elg_reallocate_nz + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_elg_reallocate_nz end interface interface - subroutine psb_i_elg_allocate_mnnz(m,n,a,nz) - import :: psb_i_elg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_i_elg_allocate_mnnz + end subroutine psb_i_cuda_elg_allocate_mnnz end interface interface - subroutine psb_i_elg_mold(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_mold(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_mold + end subroutine psb_i_cuda_elg_mold end interface interface - subroutine psb_i_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& & imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_csput_a + end subroutine psb_i_cuda_elg_csput_a end interface interface - subroutine psb_i_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_i_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_i_base_vect_type,& + subroutine psb_i_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_i_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_i_base_vect_type,& & psb_i_base_vect_type - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a class(psb_i_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_csput_v + end subroutine psb_i_cuda_elg_csput_v end interface interface - subroutine psb_i_elg_from_gpu(a,info) - import :: psb_i_elg_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_elg_from_gpu(a,info) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_from_gpu + end subroutine psb_i_cuda_elg_from_gpu end interface interface - subroutine psb_i_elg_to_gpu(a,info, nzrm) - import :: psb_i_elg_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_i_elg_to_gpu + end subroutine psb_i_cuda_elg_to_gpu end interface interface - subroutine psb_i_cp_elg_from_coo(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_elg_from_coo(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_elg_from_coo + end subroutine psb_i_cuda_cp_elg_from_coo end interface interface - subroutine psb_i_cp_elg_from_fmt(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_elg_from_fmt + end subroutine psb_i_cuda_cp_elg_from_fmt end interface interface - subroutine psb_i_mv_elg_from_coo(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_elg_from_coo(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_elg_from_coo + end subroutine psb_i_cuda_mv_elg_from_coo end interface interface - subroutine psb_i_mv_elg_from_fmt(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_elg_from_fmt + end subroutine psb_i_cuda_mv_elg_from_fmt end interface interface - subroutine psb_i_elg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:) integer(psb_ipk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_elg_csmv + end subroutine psb_i_cuda_elg_csmv end interface interface - subroutine psb_i_elg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) integer(psb_ipk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_elg_csmm + end subroutine psb_i_cuda_elg_csmm end interface interface - subroutine psb_i_elg_scal(d,a,info, side) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_elg_scal(d,a,info, side) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_i_elg_scal + end subroutine psb_i_cuda_elg_scal end interface interface - subroutine psb_i_elg_scals(d,a,info) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_elg_scals(d,a,info) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_scals + end subroutine psb_i_cuda_elg_scals end interface interface - subroutine psb_i_elg_asb(a) - import :: psb_i_elg_sparse_mat - class(psb_i_elg_sparse_mat), intent(inout) :: a - end subroutine psb_i_elg_asb + subroutine psb_i_cuda_elg_asb(a) + import :: psb_i_cuda_elg_sparse_mat + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_elg_asb end interface @@ -287,9 +287,9 @@ contains ! == =================================== - function i_elg_sizeof(a) result(res) + function i_cuda_elg_sizeof(a) result(res) implicit none - class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res if (a%is_dev()) call a%sync() @@ -302,13 +302,13 @@ contains ! on the GPU device side? ! res = 2*res - end function i_elg_sizeof + end function i_cuda_elg_sizeof - function i_elg_get_fmt() result(res) + function i_cuda_elg_get_fmt() result(res) implicit none character(len=5) :: res res = 'ELG' - end function i_elg_get_fmt + end function i_cuda_elg_get_fmt @@ -323,12 +323,12 @@ contains ! ! ! == =================================== - subroutine i_elg_reinit(a,clear) + subroutine i_cuda_elg_reinit(a,clear) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a logical, intent(in), optional :: clear integer(psb_ipk_) :: isz, err_act character(len=20) :: name='reinit' @@ -367,14 +367,14 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine i_elg_reinit + end subroutine i_cuda_elg_reinit - subroutine i_elg_free(a) + subroutine i_cuda_elg_free(a) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) @@ -384,12 +384,12 @@ contains return - end subroutine i_elg_free + end subroutine i_cuda_elg_free - subroutine i_elg_sync(a) + subroutine i_cuda_elg_sync(a) implicit none - class(psb_i_elg_sparse_mat), target, intent(in) :: a - class(psb_i_elg_sparse_mat), pointer :: tmpa + class(psb_i_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -401,83 +401,83 @@ contains call tmpa%set_sync() return - end subroutine i_elg_sync + end subroutine i_cuda_elg_sync - subroutine i_elg_set_host(a) + subroutine i_cuda_elg_set_host(a) implicit none - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine i_elg_set_host + end subroutine i_cuda_elg_set_host - subroutine i_elg_set_dev(a) + subroutine i_cuda_elg_set_dev(a) implicit none - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine i_elg_set_dev + end subroutine i_cuda_elg_set_dev - subroutine i_elg_set_sync(a) + subroutine i_cuda_elg_set_sync(a) implicit none - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine i_elg_set_sync + end subroutine i_cuda_elg_set_sync - function i_elg_is_dev(a) result(res) + function i_cuda_elg_is_dev(a) result(res) implicit none - class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function i_elg_is_dev + end function i_cuda_elg_is_dev - function i_elg_is_host(a) result(res) + function i_cuda_elg_is_host(a) result(res) implicit none - class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function i_elg_is_host + end function i_cuda_elg_is_host - function i_elg_is_sync(a) result(res) + function i_cuda_elg_is_sync(a) result(res) implicit none - class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function i_elg_is_sync + end function i_cuda_elg_is_sync - subroutine i_elg_finalize(a) + subroutine i_cuda_elg_finalize(a) use elldev_mod implicit none - type(psb_i_elg_sparse_mat), intent(inout) :: a + type(psb_i_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine i_elg_finalize + end subroutine i_cuda_elg_finalize #else interface - subroutine psb_i_elg_asb(a) - import :: psb_i_elg_sparse_mat - class(psb_i_elg_sparse_mat), intent(inout) :: a - end subroutine psb_i_elg_asb + subroutine psb_i_cuda_elg_asb(a) + import :: psb_i_cuda_elg_sparse_mat + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_elg_asb end interface interface - subroutine psb_i_elg_mold(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_mold(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_mold + end subroutine psb_i_cuda_elg_mold end interface #endif -end module psb_i_elg_mat_mod +end module psb_i_cuda_elg_mat_mod diff --git a/cuda/psb_i_hdiag_mat_mod.F90 b/cuda/psb_i_cuda_hdiag_mat_mod.F90 similarity index 50% rename from cuda/psb_i_hdiag_mat_mod.F90 rename to cuda/psb_i_cuda_hdiag_mat_mod.F90 index a42030b8..03ff573b 100644 --- a/cuda/psb_i_hdiag_mat_mod.F90 +++ b/cuda/psb_i_cuda_hdiag_mat_mod.F90 @@ -30,182 +30,182 @@ ! -module psb_i_hdiag_mat_mod +module psb_i_cuda_hdiag_mat_mod use iso_c_binding use psb_base_mod use psb_i_hdia_mat_mod - type, extends(psb_i_hdia_sparse_mat) :: psb_i_hdiag_sparse_mat + type, extends(psb_i_hdia_sparse_mat) :: psb_i_cuda_hdiag_sparse_mat ! #ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => i_hdiag_get_fmt - ! procedure, pass(a) :: sizeof => i_hdiag_sizeof - procedure, pass(a) :: vect_mv => psb_i_hdiag_vect_mv - ! procedure, pass(a) :: csmm => psb_i_hdiag_csmm - procedure, pass(a) :: csmv => psb_i_hdiag_csmv - ! procedure, pass(a) :: in_vect_sv => psb_i_hdiag_inner_vect_sv - ! procedure, pass(a) :: scals => psb_i_hdiag_scals - ! procedure, pass(a) :: scalv => psb_i_hdiag_scal - ! procedure, pass(a) :: reallocate_nz => psb_i_hdiag_reallocate_nz - ! procedure, pass(a) :: allocate_mnnz => psb_i_hdiag_allocate_mnnz + procedure, nopass :: get_fmt => i_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => i_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_i_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_i_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_i_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_i_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_i_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_i_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_i_cuda_hdiag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_hdiag_from_coo - ! procedure, pass(a) :: cp_from_fmt => psb_i_cp_hdiag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_hdiag_from_coo - ! procedure, pass(a) :: mv_from_fmt => psb_i_mv_hdiag_from_fmt - procedure, pass(a) :: free => i_hdiag_free - procedure, pass(a) :: mold => psb_i_hdiag_mold - procedure, pass(a) :: to_gpu => psb_i_hdiag_to_gpu - final :: i_hdiag_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => i_cuda_hdiag_free + procedure, pass(a) :: mold => psb_i_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_hdiag_to_gpu + final :: i_cuda_hdiag_finalize #else contains - procedure, pass(a) :: mold => psb_i_hdiag_mold + procedure, pass(a) :: mold => psb_i_cuda_hdiag_mold #endif - end type psb_i_hdiag_sparse_mat + end type psb_i_cuda_hdiag_sparse_mat #ifdef HAVE_SPGPU - private :: i_hdiag_get_nzeros, i_hdiag_free, i_hdiag_get_fmt, & - & i_hdiag_get_size, i_hdiag_sizeof, i_hdiag_get_nz_row + private :: i_cuda_hdiag_get_nzeros, i_cuda_hdiag_free, i_cuda_hdiag_get_fmt, & + & i_cuda_hdiag_get_size, i_cuda_hdiag_sizeof, i_cuda_hdiag_get_nz_row interface - subroutine psb_i_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hdiag_vect_mv + end subroutine psb_i_cuda_hdiag_vect_mv end interface !!$ interface -!!$ subroutine psb_i_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_i_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type -!!$ class(psb_i_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_i_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a !!$ integer(psb_ipk_), intent(in) :: alpha, beta !!$ class(psb_i_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_i_hdiag_inner_vect_sv +!!$ end subroutine psb_i_cuda_hdiag_inner_vect_sv !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_hdiag_reallocate_nz(nz,a) -!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_i_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_i_hdiag_reallocate_nz +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_i_cuda_hdiag_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_hdiag_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_i_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_i_hdiag_allocate_mnnz +!!$ end subroutine psb_i_cuda_hdiag_allocate_mnnz !!$ end interface interface - subroutine psb_i_hdiag_mold(a,b,info) - import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hdiag_mold(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hdiag_mold + end subroutine psb_i_cuda_hdiag_mold end interface interface - subroutine psb_i_hdiag_to_gpu(a,info) - import :: psb_i_hdiag_sparse_mat, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hdiag_to_gpu(a,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hdiag_to_gpu + end subroutine psb_i_cuda_hdiag_to_gpu end interface interface - subroutine psb_i_cp_hdiag_from_coo(a,b,info) - import :: psb_i_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_hdiag_from_coo + end subroutine psb_i_cuda_cp_hdiag_from_coo end interface !!$ interface -!!$ subroutine psb_i_cp_hdiag_from_fmt(a,b,info) -!!$ import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_i_base_sparse_mat), intent(in) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_i_cp_hdiag_from_fmt +!!$ end subroutine psb_i_cuda_cp_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_i_mv_hdiag_from_coo(a,b,info) - import :: psb_i_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_hdiag_from_coo + end subroutine psb_i_cuda_mv_hdiag_from_coo end interface !!$ !!$ interface -!!$ subroutine psb_i_mv_hdiag_from_fmt(a,b,info) -!!$ import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_i_base_sparse_mat), intent(inout) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_i_mv_hdiag_from_fmt +!!$ end subroutine psb_i_cuda_mv_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_i_hdiag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:) integer(psb_ipk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hdiag_csmv + end subroutine psb_i_cuda_hdiag_csmv end interface !!$ interface -!!$ subroutine psb_i_hdiag_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_i_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a !!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) !!$ integer(psb_ipk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_i_hdiag_csmm +!!$ end subroutine psb_i_cuda_hdiag_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_hdiag_scal(d,a,info, side) -!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_i_hdiag_scal +!!$ end subroutine psb_i_cuda_hdiag_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_hdiag_scals(d,a,info) -!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_i_hdiag_scals +!!$ end subroutine psb_i_cuda_hdiag_scals !!$ end interface !!$ @@ -223,11 +223,11 @@ contains ! ! == =================================== - function i_hdiag_get_fmt() result(res) + function i_cuda_hdiag_get_fmt() result(res) implicit none character(len=5) :: res res = 'HDIAG' - end function i_hdiag_get_fmt + end function i_cuda_hdiag_get_fmt @@ -243,11 +243,11 @@ contains ! ! == =================================== - subroutine i_hdiag_free(a) + subroutine i_cuda_hdiag_free(a) use hdiagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_hdiag_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -256,12 +256,12 @@ contains return - end subroutine i_hdiag_free + end subroutine i_cuda_hdiag_free - subroutine i_hdiag_finalize(a) + subroutine i_cuda_hdiag_finalize(a) use hdiagdev_mod implicit none - type(psb_i_hdiag_sparse_mat), intent(inout) :: a + type(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -269,19 +269,19 @@ contains call a%psb_i_hdia_sparse_mat%free() return - end subroutine i_hdiag_finalize + end subroutine i_cuda_hdiag_finalize #else interface - subroutine psb_i_hdiag_mold(a,b,info) - import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hdiag_mold(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hdiag_mold + end subroutine psb_i_cuda_hdiag_mold end interface #endif -end module psb_i_hdiag_mat_mod +end module psb_i_cuda_hdiag_mat_mod diff --git a/cuda/psb_i_hlg_mat_mod.F90 b/cuda/psb_i_cuda_hlg_mat_mod.F90 similarity index 50% rename from cuda/psb_i_hlg_mat_mod.F90 rename to cuda/psb_i_cuda_hlg_mat_mod.F90 index 2ec881ce..f97470d2 100644 --- a/cuda/psb_i_hlg_mat_mod.F90 +++ b/cuda/psb_i_cuda_hlg_mat_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_i_hlg_mat_mod +module psb_i_cuda_hlg_mat_mod use iso_c_binding use psb_i_mat_mod @@ -41,7 +41,7 @@ module psb_i_hlg_mat_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_i_hll_sparse_mat) :: psb_i_hlg_sparse_mat + type, extends(psb_i_hll_sparse_mat) :: psb_i_cuda_hlg_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,186 +54,186 @@ module psb_i_hlg_mat_mod integer :: devstate = is_host contains - procedure, nopass :: get_fmt => i_hlg_get_fmt - procedure, pass(a) :: sizeof => i_hlg_sizeof - procedure, pass(a) :: vect_mv => psb_i_hlg_vect_mv - procedure, pass(a) :: csmm => psb_i_hlg_csmm - procedure, pass(a) :: csmv => psb_i_hlg_csmv - procedure, pass(a) :: in_vect_sv => psb_i_hlg_inner_vect_sv - procedure, pass(a) :: scals => psb_i_hlg_scals - procedure, pass(a) :: scalv => psb_i_hlg_scal - procedure, pass(a) :: reallocate_nz => psb_i_hlg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_i_hlg_allocate_mnnz + procedure, nopass :: get_fmt => i_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_i_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_i_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_i_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_hlg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_hlg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_i_cp_hlg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_hlg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_i_mv_hlg_from_fmt - procedure, pass(a) :: free => i_hlg_free - procedure, pass(a) :: mold => psb_i_hlg_mold - procedure, pass(a) :: is_host => i_hlg_is_host - procedure, pass(a) :: is_dev => i_hlg_is_dev - procedure, pass(a) :: is_sync => i_hlg_is_sync - procedure, pass(a) :: set_host => i_hlg_set_host - procedure, pass(a) :: set_dev => i_hlg_set_dev - procedure, pass(a) :: set_sync => i_hlg_set_sync - procedure, pass(a) :: sync => i_hlg_sync - procedure, pass(a) :: from_gpu => psb_i_hlg_from_gpu - procedure, pass(a) :: to_gpu => psb_i_hlg_to_gpu - final :: i_hlg_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => i_cuda_hlg_free + procedure, pass(a) :: mold => psb_i_cuda_hlg_mold + procedure, pass(a) :: is_host => i_cuda_hlg_is_host + procedure, pass(a) :: is_dev => i_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => i_cuda_hlg_is_sync + procedure, pass(a) :: set_host => i_cuda_hlg_set_host + procedure, pass(a) :: set_dev => i_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => i_cuda_hlg_set_sync + procedure, pass(a) :: sync => i_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_i_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_i_cuda_hlg_to_gpu + final :: i_cuda_hlg_finalize #else contains - procedure, pass(a) :: mold => psb_i_hlg_mold + procedure, pass(a) :: mold => psb_i_cuda_hlg_mold #endif - end type psb_i_hlg_sparse_mat + end type psb_i_cuda_hlg_sparse_mat #ifdef HAVE_SPGPU - private :: i_hlg_get_nzeros, i_hlg_free, i_hlg_get_fmt, & - & i_hlg_get_size, i_hlg_sizeof, i_hlg_get_nz_row + private :: i_cuda_hlg_get_nzeros, i_cuda_hlg_free, i_cuda_hlg_get_fmt, & + & i_cuda_hlg_get_size, i_cuda_hlg_sizeof, i_cuda_hlg_get_nz_row interface - subroutine psb_i_hlg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hlg_vect_mv + end subroutine psb_i_cuda_hlg_vect_mv end interface interface - subroutine psb_i_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_i_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hlg_inner_vect_sv + end subroutine psb_i_cuda_hlg_inner_vect_sv end interface interface - subroutine psb_i_hlg_reallocate_nz(nz,a) - import :: psb_i_hlg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_hlg_reallocate_nz(nz,a) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_i_hlg_sparse_mat), intent(inout) :: a - end subroutine psb_i_hlg_reallocate_nz + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_hlg_reallocate_nz end interface interface - subroutine psb_i_hlg_allocate_mnnz(m,n,a,nz) - import :: psb_i_hlg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_i_hlg_allocate_mnnz + end subroutine psb_i_cuda_hlg_allocate_mnnz end interface interface - subroutine psb_i_hlg_mold(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_mold(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hlg_mold + end subroutine psb_i_cuda_hlg_mold end interface interface - subroutine psb_i_hlg_from_gpu(a,info) - import :: psb_i_hlg_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hlg_from_gpu(a,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hlg_from_gpu + end subroutine psb_i_cuda_hlg_from_gpu end interface interface - subroutine psb_i_hlg_to_gpu(a,info, nzrm) - import :: psb_i_hlg_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_i_hlg_to_gpu + end subroutine psb_i_cuda_hlg_to_gpu end interface interface - subroutine psb_i_cp_hlg_from_coo(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_hlg_from_coo + end subroutine psb_i_cuda_cp_hlg_from_coo end interface interface - subroutine psb_i_cp_hlg_from_fmt(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_hlg_from_fmt + end subroutine psb_i_cuda_cp_hlg_from_fmt end interface interface - subroutine psb_i_mv_hlg_from_coo(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_hlg_from_coo + end subroutine psb_i_cuda_mv_hlg_from_coo end interface interface - subroutine psb_i_mv_hlg_from_fmt(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_hlg_from_fmt + end subroutine psb_i_cuda_mv_hlg_from_fmt end interface interface - subroutine psb_i_hlg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:) integer(psb_ipk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hlg_csmv + end subroutine psb_i_cuda_hlg_csmv end interface interface - subroutine psb_i_hlg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) integer(psb_ipk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hlg_csmm + end subroutine psb_i_cuda_hlg_csmm end interface interface - subroutine psb_i_hlg_scal(d,a,info, side) - import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hlg_scal(d,a,info, side) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_i_hlg_scal + end subroutine psb_i_cuda_hlg_scal end interface interface - subroutine psb_i_hlg_scals(d,a,info) - import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hlg_scals(d,a,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hlg_scals + end subroutine psb_i_cuda_hlg_scals end interface @@ -252,9 +252,9 @@ contains ! == =================================== - function i_hlg_sizeof(a) result(res) + function i_cuda_hlg_sizeof(a) result(res) implicit none - class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res @@ -269,13 +269,13 @@ contains ! on the GPU device side? ! res = 2*res - end function i_hlg_sizeof + end function i_cuda_hlg_sizeof - function i_hlg_get_fmt() result(res) + function i_cuda_hlg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HLG' - end function i_hlg_get_fmt + end function i_cuda_hlg_get_fmt @@ -291,11 +291,11 @@ contains ! ! == =================================== - subroutine i_hlg_free(a) + subroutine i_cuda_hlg_free(a) use hlldev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) @@ -304,13 +304,13 @@ contains return - end subroutine i_hlg_free + end subroutine i_cuda_hlg_free - subroutine i_hlg_sync(a) + subroutine i_cuda_hlg_sync(a) implicit none - class(psb_i_hlg_sparse_mat), target, intent(in) :: a - class(psb_i_hlg_sparse_mat), pointer :: tmpa + class(psb_i_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -322,77 +322,77 @@ contains call tmpa%set_sync() return - end subroutine i_hlg_sync + end subroutine i_cuda_hlg_sync - subroutine i_hlg_set_host(a) + subroutine i_cuda_hlg_set_host(a) implicit none - class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine i_hlg_set_host + end subroutine i_cuda_hlg_set_host - subroutine i_hlg_set_dev(a) + subroutine i_cuda_hlg_set_dev(a) implicit none - class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine i_hlg_set_dev + end subroutine i_cuda_hlg_set_dev - subroutine i_hlg_set_sync(a) + subroutine i_cuda_hlg_set_sync(a) implicit none - class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine i_hlg_set_sync + end subroutine i_cuda_hlg_set_sync - function i_hlg_is_dev(a) result(res) + function i_cuda_hlg_is_dev(a) result(res) implicit none - class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function i_hlg_is_dev + end function i_cuda_hlg_is_dev - function i_hlg_is_host(a) result(res) + function i_cuda_hlg_is_host(a) result(res) implicit none - class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function i_hlg_is_host + end function i_cuda_hlg_is_host - function i_hlg_is_sync(a) result(res) + function i_cuda_hlg_is_sync(a) result(res) implicit none - class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function i_hlg_is_sync + end function i_cuda_hlg_is_sync - subroutine i_hlg_finalize(a) + subroutine i_cuda_hlg_finalize(a) use hlldev_mod implicit none - type(psb_i_hlg_sparse_mat), intent(inout) :: a + type(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine i_hlg_finalize + end subroutine i_cuda_hlg_finalize #else interface - subroutine psb_i_hlg_mold(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_mold(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hlg_mold + end subroutine psb_i_cuda_hlg_mold end interface #endif -end module psb_i_hlg_mat_mod +end module psb_i_cuda_hlg_mat_mod diff --git a/cuda/psb_i_hybg_mat_mod.F90 b/cuda/psb_i_cuda_hybg_mat_mod.F90 similarity index 52% rename from cuda/psb_i_hybg_mat_mod.F90 rename to cuda/psb_i_cuda_hybg_mat_mod.F90 index 388a8801..10333c24 100644 --- a/cuda/psb_i_hybg_mat_mod.F90 +++ b/cuda/psb_i_cuda_hybg_mat_mod.F90 @@ -31,13 +31,13 @@ #if CUDA_SHORT_VERSION <= 10 -module psb_i_hybg_mat_mod +module psb_i_cuda_hybg_mat_mod use iso_c_binding use psb_i_mat_mod use cusparse_mod - type, extends(psb_i_csr_sparse_mat) :: psb_i_hybg_sparse_mat + type, extends(psb_i_csr_sparse_mat) :: psb_i_cuda_hybg_sparse_mat ! ! HYBG. An interface to the cuSPARSE HYB ! On the CPU side we keep a CSR storage. @@ -49,170 +49,170 @@ module psb_i_hybg_mat_mod type(i_Hmat) :: deviceMat contains - procedure, nopass :: get_fmt => i_hybg_get_fmt - procedure, pass(a) :: sizeof => i_hybg_sizeof - procedure, pass(a) :: vect_mv => psb_i_hybg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_i_hybg_inner_vect_sv - procedure, pass(a) :: csmm => psb_i_hybg_csmm - procedure, pass(a) :: csmv => psb_i_hybg_csmv - procedure, pass(a) :: scals => psb_i_hybg_scals - procedure, pass(a) :: scalv => psb_i_hybg_scal - procedure, pass(a) :: reallocate_nz => psb_i_hybg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_i_hybg_allocate_mnnz + procedure, nopass :: get_fmt => i_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_i_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_i_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_i_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_hybg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_hybg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_i_cp_hybg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_hybg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_i_mv_hybg_from_fmt - procedure, pass(a) :: free => i_hybg_free - procedure, pass(a) :: mold => psb_i_hybg_mold - procedure, pass(a) :: to_gpu => psb_i_hybg_to_gpu - final :: i_hybg_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => i_cuda_hybg_free + procedure, pass(a) :: mold => psb_i_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_hybg_to_gpu + final :: i_cuda_hybg_finalize #else contains - procedure, pass(a) :: mold => psb_i_hybg_mold + procedure, pass(a) :: mold => psb_i_cuda_hybg_mold #endif - end type psb_i_hybg_sparse_mat + end type psb_i_cuda_hybg_sparse_mat #ifdef HAVE_SPGPU - private :: i_hybg_get_nzeros, i_hybg_free, i_hybg_get_fmt, & - & i_hybg_get_size, i_hybg_sizeof, i_hybg_get_nz_row + private :: i_cuda_hybg_get_nzeros, i_cuda_hybg_free, i_cuda_hybg_get_fmt, & + & i_cuda_hybg_get_size, i_cuda_hybg_sizeof, i_cuda_hybg_get_nz_row interface - subroutine psb_i_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hybg_inner_vect_sv + end subroutine psb_i_cuda_hybg_inner_vect_sv end interface interface - subroutine psb_i_hybg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hybg_vect_mv + end subroutine psb_i_cuda_hybg_vect_mv end interface interface - subroutine psb_i_hybg_reallocate_nz(nz,a) - import :: psb_i_hybg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_hybg_reallocate_nz(nz,a) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_i_hybg_sparse_mat), intent(inout) :: a - end subroutine psb_i_hybg_reallocate_nz + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_hybg_reallocate_nz end interface interface - subroutine psb_i_hybg_allocate_mnnz(m,n,a,nz) - import :: psb_i_hybg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_i_hybg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_i_hybg_allocate_mnnz + end subroutine psb_i_cuda_hybg_allocate_mnnz end interface interface - subroutine psb_i_hybg_mold(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_mold(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hybg_mold + end subroutine psb_i_cuda_hybg_mold end interface interface - subroutine psb_i_hybg_to_gpu(a,info, nzrm) - import :: psb_i_hybg_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_i_hybg_to_gpu + end subroutine psb_i_cuda_hybg_to_gpu end interface interface - subroutine psb_i_cp_hybg_from_coo(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_hybg_from_coo + end subroutine psb_i_cuda_cp_hybg_from_coo end interface interface - subroutine psb_i_cp_hybg_from_fmt(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_hybg_from_fmt + end subroutine psb_i_cuda_cp_hybg_from_fmt end interface interface - subroutine psb_i_mv_hybg_from_coo(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_hybg_from_coo + end subroutine psb_i_cuda_mv_hybg_from_coo end interface interface - subroutine psb_i_mv_hybg_from_fmt(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_hybg_from_fmt + end subroutine psb_i_cuda_mv_hybg_from_fmt end interface interface - subroutine psb_i_hybg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:) integer(psb_ipk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hybg_csmv + end subroutine psb_i_cuda_hybg_csmv end interface interface - subroutine psb_i_hybg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) integer(psb_ipk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hybg_csmm + end subroutine psb_i_cuda_hybg_csmm end interface interface - subroutine psb_i_hybg_scal(d,a,info,side) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hybg_scal(d,a,info,side) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_i_hybg_scal + end subroutine psb_i_cuda_hybg_scal end interface interface - subroutine psb_i_hybg_scals(d,a,info) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hybg_scals(d,a,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hybg_scals + end subroutine psb_i_cuda_hybg_scals end interface @@ -231,9 +231,9 @@ contains ! == =================================== - function i_hybg_sizeof(a) result(res) + function i_cuda_hybg_sizeof(a) result(res) implicit none - class(psb_i_hybg_sparse_mat), intent(in) :: a + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 res = res + psb_sizeof_ip * size(a%val) @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function i_hybg_sizeof + end function i_cuda_hybg_sizeof - function i_hybg_get_fmt() result(res) + function i_cuda_hybg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HYBG' - end function i_hybg_get_fmt + end function i_cuda_hybg_get_fmt @@ -265,42 +265,42 @@ contains ! ! == =================================== - subroutine i_hybg_free(a) + subroutine i_cuda_hybg_free(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - class(psb_i_hybg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) call a%psb_i_csr_sparse_mat%free() return - end subroutine i_hybg_free + end subroutine i_cuda_hybg_free - subroutine i_hybg_finalize(a) + subroutine i_cuda_hybg_finalize(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - type(psb_i_hybg_sparse_mat), intent(inout) :: a + type(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) return - end subroutine i_hybg_finalize + end subroutine i_cuda_hybg_finalize #else interface - subroutine psb_i_hybg_mold(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_mold(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hybg_mold + end subroutine psb_i_cuda_hybg_mold end interface #endif -end module psb_i_hybg_mat_mod +end module psb_i_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_i_gpu_vect_mod.F90 b/cuda/psb_i_cuda_vect_mod.F90 similarity index 72% rename from cuda/psb_i_gpu_vect_mod.F90 rename to cuda/psb_i_cuda_vect_mod.F90 index ca4950a0..8d940513 100644 --- a/cuda/psb_i_gpu_vect_mod.F90 +++ b/cuda/psb_i_cuda_vect_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_i_gpu_vect_mod +module psb_i_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod use psb_i_vect_mod #ifdef HAVE_SPGPU - use psb_gpu_env_mod + use psb_cuda_env_mod use psb_i_vectordev_mod #endif @@ -44,7 +44,7 @@ module psb_i_gpu_vect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_i_base_vect_type) :: psb_i_vect_gpu + type, extends(psb_i_base_vect_type) :: psb_i_vect_cuda #ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr @@ -56,52 +56,52 @@ module psb_i_gpu_vect_mod type(c_ptr) :: i_buf = c_null_ptr integer :: i_buf_sz = 0 contains - procedure, pass(x) :: get_nrows => i_gpu_get_nrows - procedure, nopass :: get_fmt => i_gpu_get_fmt - - procedure, pass(x) :: all => i_gpu_all - procedure, pass(x) :: zero => i_gpu_zero - procedure, pass(x) :: asb_m => i_gpu_asb_m - procedure, pass(x) :: sync => i_gpu_sync - procedure, pass(x) :: sync_space => i_gpu_sync_space - procedure, pass(x) :: bld_x => i_gpu_bld_x - procedure, pass(x) :: bld_mn => i_gpu_bld_mn - procedure, pass(x) :: free => i_gpu_free - procedure, pass(x) :: ins_a => i_gpu_ins_a - procedure, pass(x) :: ins_v => i_gpu_ins_v - procedure, pass(x) :: is_host => i_gpu_is_host - procedure, pass(x) :: is_dev => i_gpu_is_dev - procedure, pass(x) :: is_sync => i_gpu_is_sync - procedure, pass(x) :: set_host => i_gpu_set_host - procedure, pass(x) :: set_dev => i_gpu_set_dev - procedure, pass(x) :: set_sync => i_gpu_set_sync - procedure, pass(x) :: set_scal => i_gpu_set_scal -!!$ procedure, pass(x) :: set_vect => i_gpu_set_vect - procedure, pass(x) :: gthzv_x => i_gpu_gthzv_x - procedure, pass(y) :: sctb => i_gpu_sctb - procedure, pass(y) :: sctb_x => i_gpu_sctb_x - procedure, pass(x) :: gthzbuf => i_gpu_gthzbuf - procedure, pass(y) :: sctb_buf => i_gpu_sctb_buf - procedure, pass(x) :: new_buffer => i_gpu_new_buffer - procedure, nopass :: device_wait => i_gpu_device_wait - procedure, pass(x) :: free_buffer => i_gpu_free_buffer - procedure, pass(x) :: maybe_free_buffer => i_gpu_maybe_free_buffer - - final :: i_gpu_vect_finalize + procedure, pass(x) :: get_nrows => i_cuda_get_nrows + procedure, nopass :: get_fmt => i_cuda_get_fmt + + procedure, pass(x) :: all => i_cuda_all + procedure, pass(x) :: zero => i_cuda_zero + procedure, pass(x) :: asb_m => i_cuda_asb_m + procedure, pass(x) :: sync => i_cuda_sync + procedure, pass(x) :: sync_space => i_cuda_sync_space + procedure, pass(x) :: bld_x => i_cuda_bld_x + procedure, pass(x) :: bld_mn => i_cuda_bld_mn + procedure, pass(x) :: free => i_cuda_free + procedure, pass(x) :: ins_a => i_cuda_ins_a + procedure, pass(x) :: ins_v => i_cuda_ins_v + procedure, pass(x) :: is_host => i_cuda_is_host + procedure, pass(x) :: is_dev => i_cuda_is_dev + procedure, pass(x) :: is_sync => i_cuda_is_sync + procedure, pass(x) :: set_host => i_cuda_set_host + procedure, pass(x) :: set_dev => i_cuda_set_dev + procedure, pass(x) :: set_sync => i_cuda_set_sync + procedure, pass(x) :: set_scal => i_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => i_cuda_set_vect + procedure, pass(x) :: gthzv_x => i_cuda_gthzv_x + procedure, pass(y) :: sctb => i_cuda_sctb + procedure, pass(y) :: sctb_x => i_cuda_sctb_x + procedure, pass(x) :: gthzbuf => i_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => i_cuda_sctb_buf + procedure, pass(x) :: new_buffer => i_cuda_new_buffer + procedure, nopass :: device_wait => i_cuda_device_wait + procedure, pass(x) :: free_buffer => i_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => i_cuda_maybe_free_buffer + + final :: i_cuda_vect_finalize #endif - end type psb_i_vect_gpu + end type psb_i_vect_cuda - public :: psb_i_vect_gpu_ + public :: psb_i_vect_cuda_ private :: constructor - interface psb_i_vect_gpu_ + interface psb_i_vect_cuda_ module procedure constructor - end interface psb_i_vect_gpu_ + end interface psb_i_vect_cuda_ contains function constructor(x) result(this) integer(psb_ipk_) :: x(:) - type(psb_i_vect_gpu) :: this + type(psb_i_vect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -111,20 +111,20 @@ contains #ifdef HAVE_SPGPU - subroutine i_gpu_device_wait() + subroutine i_cuda_device_wait() call psb_cudaSync() - end subroutine i_gpu_device_wait + end subroutine i_cuda_device_wait - subroutine i_gpu_new_buffer(n,x,info) + subroutine i_cuda_new_buffer(n,x,info) use psb_realloc_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(x%combuf)) then if (size(x%combuf) idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then ! ! Only need a sync in this branch; in the others ! cudamemCpy acts as a sync point. @@ -314,14 +314,14 @@ contains end select - end subroutine i_gpu_gthzv_x + end subroutine i_cuda_gthzv_x - subroutine i_gpu_gthzbuf(i,n,idx,x) - use psb_gpu_env_mod + subroutine i_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i,n class(psb_i_base_vect_type) :: idx - class(psb_i_vect_gpu) :: x + class(psb_i_vect_cuda) :: x integer :: info, ni info = 0 @@ -332,11 +332,11 @@ contains end if select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) @@ -367,14 +367,14 @@ contains end select - end subroutine i_gpu_gthzbuf + end subroutine i_cuda_gthzbuf - subroutine i_gpu_sctb(n,idx,x,beta,y) + subroutine i_cuda_sctb(n,idx,x,beta,y) implicit none !use psb_const_mod integer(psb_ipk_) :: n, idx(:) integer(psb_ipk_) :: beta, x(:) - class(psb_i_vect_gpu) :: y + class(psb_i_vect_cuda) :: y integer(psb_ipk_) :: info if (n == 0) return @@ -384,24 +384,24 @@ contains call y%psb_i_base_vect_type%sctb(n,idx,x,beta) call y%set_host() - end subroutine i_gpu_sctb + end subroutine i_cuda_sctb - subroutine i_gpu_sctb_x(i,n,idx,x,beta,y) - use psb_gpu_env_mod + subroutine i_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx integer(psb_ipk_) :: beta, x(:) - class(psb_i_vect_gpu) :: y + class(psb_i_vect_cuda) :: y integer :: info, ni select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() ! - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(y%pinned_buffer)) then if (size(y%pinned_buffer) < n) then call inner_unregister(y%pinned_buffer) @@ -489,16 +489,16 @@ contains call psb_cudaSync() call y%set_dev() - end subroutine i_gpu_sctb_x + end subroutine i_cuda_sctb_x - subroutine i_gpu_sctb_buf(i,n,idx,beta,y) + subroutine i_cuda_sctb_buf(i,n,idx,beta,y) use psi_serial_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx integer(psb_ipk_) :: beta - class(psb_i_vect_gpu) :: y + class(psb_i_vect_cuda) :: y integer(psb_ipk_) :: info, ni !!$ write(0,*) 'Starting sctb_buf' @@ -509,11 +509,11 @@ contains select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) else @@ -540,106 +540,106 @@ contains end select !!$ write(0,*) 'Done sctb_buf' - end subroutine i_gpu_sctb_buf + end subroutine i_cuda_sctb_buf - subroutine i_gpu_bld_x(x,this) + subroutine i_cuda_bld_x(x,this) use psb_base_mod integer(psb_ipk_), intent(in) :: this(:) - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call psb_realloc(size(this),x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'i_gpu_bld_x',& + call psb_errpush(info,'i_cuda_bld_x',& & i_err=(/size(this),izero,izero,izero,izero/)) end if x%v(:) = this(:) call x%set_host() call x%sync() - end subroutine i_gpu_bld_x + end subroutine i_cuda_bld_x - subroutine i_gpu_bld_mn(x,n) + subroutine i_cuda_bld_mn(x,n) integer(psb_mpk_), intent(in) :: n - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(n,info) if (info /= 0) then - call psb_errpush(info,'i_gpu_bld_n',i_err=(/n,n,n,n,n/)) + call psb_errpush(info,'i_cuda_bld_n',i_err=(/n,n,n,n,n/)) end if - end subroutine i_gpu_bld_mn + end subroutine i_cuda_bld_mn - subroutine i_gpu_set_host(x) + subroutine i_cuda_set_host(x) implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x x%state = is_host - end subroutine i_gpu_set_host + end subroutine i_cuda_set_host - subroutine i_gpu_set_dev(x) + subroutine i_cuda_set_dev(x) implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x x%state = is_dev - end subroutine i_gpu_set_dev + end subroutine i_cuda_set_dev - subroutine i_gpu_set_sync(x) + subroutine i_cuda_set_sync(x) implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x x%state = is_sync - end subroutine i_gpu_set_sync + end subroutine i_cuda_set_sync - function i_gpu_is_dev(x) result(res) + function i_cuda_is_dev(x) result(res) implicit none - class(psb_i_vect_gpu), intent(in) :: x + class(psb_i_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function i_gpu_is_dev + end function i_cuda_is_dev - function i_gpu_is_host(x) result(res) + function i_cuda_is_host(x) result(res) implicit none - class(psb_i_vect_gpu), intent(in) :: x + class(psb_i_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function i_gpu_is_host + end function i_cuda_is_host - function i_gpu_is_sync(x) result(res) + function i_cuda_is_sync(x) result(res) implicit none - class(psb_i_vect_gpu), intent(in) :: x + class(psb_i_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function i_gpu_is_sync + end function i_cuda_is_sync - function i_gpu_get_nrows(x) result(res) + function i_cuda_get_nrows(x) result(res) implicit none - class(psb_i_vect_gpu), intent(in) :: x + class(psb_i_vect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = size(x%v) - end function i_gpu_get_nrows + end function i_cuda_get_nrows - function i_gpu_get_fmt() result(res) + function i_cuda_get_fmt() result(res) implicit none character(len=5) :: res res = 'iGPU' - end function i_gpu_get_fmt + end function i_cuda_get_fmt - subroutine i_gpu_all(n, x, info) + subroutine i_cuda_all(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: n - class(psb_i_vect_gpu), intent(out) :: x + class(psb_i_vect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(n,x%v,info) @@ -647,26 +647,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'i_gpu_all',& + call psb_errpush(info,'i_cuda_all',& & i_err=(/n,n,n,n,n/)) end if - end subroutine i_gpu_all + end subroutine i_cuda_all - subroutine i_gpu_zero(x) + subroutine i_cuda_zero(x) use psi_serial_mod implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=izero call x%set_host() - end subroutine i_gpu_zero + end subroutine i_cuda_zero - subroutine i_gpu_asb_m(n, x, info) + subroutine i_cuda_asb_m(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_mpk_), intent(in) :: n - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: nd @@ -686,12 +686,12 @@ contains end if end if - end subroutine i_gpu_asb_m + end subroutine i_cuda_asb_m - subroutine i_gpu_sync_space(x,info) + subroutine i_cuda_sync_space(x,info) use psb_base_mod, only : psb_realloc implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh, nd @@ -730,12 +730,12 @@ contains end if end if - end subroutine i_gpu_sync_space + end subroutine i_cuda_sync_space - subroutine i_gpu_sync(x) + subroutine i_cuda_sync(x) use psb_base_mod, only : psb_realloc implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -761,31 +761,31 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'i_gpu_sync') + call psb_errpush(info,'i_cuda_sync') end if - end subroutine i_gpu_sync + end subroutine i_cuda_sync - subroutine i_gpu_free(x, info) + subroutine i_cuda_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) if (c_associated(x%deviceVect)) then -!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' call freeMultiVecDevice(x%deviceVect) x%deviceVect=c_null_ptr end if call x%free_buffer(info) call x%set_sync() - end subroutine i_gpu_free + end subroutine i_cuda_free - subroutine i_gpu_set_scal(x,val,first,last) - class(psb_i_vect_gpu), intent(inout) :: x + subroutine i_cuda_set_scal(x,val,first,last) + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: val integer(psb_ipk_), optional :: first, last @@ -800,10 +800,10 @@ contains info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() - end subroutine i_gpu_set_scal + end subroutine i_cuda_set_scal !!$ -!!$ subroutine i_gpu_set_vect(x,val) -!!$ class(psb_i_vect_gpu), intent(inout) :: x +!!$ subroutine i_cuda_set_vect(x,val) +!!$ class(psb_i_vect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: val(:) !!$ integer(psb_ipk_) :: nr !!$ integer(psb_ipk_) :: info @@ -812,68 +812,68 @@ contains !!$ call x%psb_i_base_vect_type%set_vect(val) !!$ call x%set_host() !!$ -!!$ end subroutine i_gpu_set_vect +!!$ end subroutine i_cuda_set_vect - subroutine i_gpu_vect_finalize(x) + subroutine i_cuda_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_i_vect_gpu), intent(inout) :: x + type(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 call x%free(info) - end subroutine i_gpu_vect_finalize + end subroutine i_cuda_vect_finalize - subroutine i_gpu_ins_v(n,irl,val,dupl,x,info) + subroutine i_cuda_ins_v(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl class(psb_i_base_vect_type), intent(inout) :: irl class(psb_i_base_vect_type), intent(inout) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, isz - logical :: done_gpu + logical :: done_cuda info = 0 if (psb_errstatus_fatal()) return - done_gpu = .false. + done_cuda = .false. select type(virl => irl) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type(vval => val) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() info = geinsMultiVecDeviceInt(n,virl%deviceVect,& & vval%deviceVect,dupl,1,x%deviceVect) call x%set_dev() - done_gpu=.true. + done_cuda=.true. end select end select - if (.not.done_gpu) then + if (.not.done_cuda) then if (irl%is_dev()) call irl%sync() if (val%is_dev()) call val%sync() call x%ins(n,irl%v,val%v,dupl,info) end if if (info /= 0) then - call psb_errpush(info,'gpu_vect_ins') + call psb_errpush(info,'cuda_vect_ins') return end if - end subroutine i_gpu_ins_v + end subroutine i_cuda_ins_v - subroutine i_gpu_ins_a(n,irl,val,dupl,x,info) + subroutine i_cuda_ins_a(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) integer(psb_ipk_), intent(in) :: val(:) @@ -886,11 +886,11 @@ contains call x%psb_i_base_vect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine i_gpu_ins_a + end subroutine i_cuda_ins_a #endif -end module psb_i_gpu_vect_mod +end module psb_i_cuda_vect_mod ! @@ -899,7 +899,7 @@ end module psb_i_gpu_vect_mod -module psb_i_gpu_multivect_mod +module psb_i_cuda_multivect_mod use iso_c_binding use psb_const_mod use psb_error_mod @@ -914,7 +914,7 @@ module psb_i_gpu_multivect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_i_base_multivect_type) :: psb_i_multivect_gpu + type, extends(psb_i_base_multivect_type) :: psb_i_multivect_cuda #ifdef HAVE_SPGPU integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 @@ -922,48 +922,48 @@ module psb_i_gpu_multivect_mod real(c_double), allocatable :: buffer(:,:) type(c_ptr) :: dt_buf = c_null_ptr contains - procedure, pass(x) :: get_nrows => i_gpu_multi_get_nrows - procedure, pass(x) :: get_ncols => i_gpu_multi_get_ncols - procedure, nopass :: get_fmt => i_gpu_multi_get_fmt -!!$ procedure, pass(x) :: dot_v => i_gpu_multi_dot_v -!!$ procedure, pass(x) :: dot_a => i_gpu_multi_dot_a -!!$ procedure, pass(y) :: axpby_v => i_gpu_multi_axpby_v -!!$ procedure, pass(y) :: axpby_a => i_gpu_multi_axpby_a -!!$ procedure, pass(y) :: mlt_v => i_gpu_multi_mlt_v -!!$ procedure, pass(y) :: mlt_a => i_gpu_multi_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => i_gpu_multi_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => i_gpu_multi_mlt_v_2 -!!$ procedure, pass(x) :: scal => i_gpu_multi_scal -!!$ procedure, pass(x) :: nrm2 => i_gpu_multi_nrm2 -!!$ procedure, pass(x) :: amax => i_gpu_multi_amax -!!$ procedure, pass(x) :: asum => i_gpu_multi_asum - procedure, pass(x) :: all => i_gpu_multi_all - procedure, pass(x) :: zero => i_gpu_multi_zero - procedure, pass(x) :: asb => i_gpu_multi_asb - procedure, pass(x) :: sync => i_gpu_multi_sync - procedure, pass(x) :: sync_space => i_gpu_multi_sync_space - procedure, pass(x) :: bld_x => i_gpu_multi_bld_x - procedure, pass(x) :: bld_n => i_gpu_multi_bld_n - procedure, pass(x) :: free => i_gpu_multi_free - procedure, pass(x) :: ins => i_gpu_multi_ins - procedure, pass(x) :: is_host => i_gpu_multi_is_host - procedure, pass(x) :: is_dev => i_gpu_multi_is_dev - procedure, pass(x) :: is_sync => i_gpu_multi_is_sync - procedure, pass(x) :: set_host => i_gpu_multi_set_host - procedure, pass(x) :: set_dev => i_gpu_multi_set_dev - procedure, pass(x) :: set_sync => i_gpu_multi_set_sync - procedure, pass(x) :: set_scal => i_gpu_multi_set_scal - procedure, pass(x) :: set_vect => i_gpu_multi_set_vect -!!$ procedure, pass(x) :: gthzv_x => i_gpu_multi_gthzv_x -!!$ procedure, pass(y) :: sctb => i_gpu_multi_sctb -!!$ procedure, pass(y) :: sctb_x => i_gpu_multi_sctb_x - final :: i_gpu_multi_vect_finalize + procedure, pass(x) :: get_nrows => i_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => i_cuda_multi_get_ncols + procedure, nopass :: get_fmt => i_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => i_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => i_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => i_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => i_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => i_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => i_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => i_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => i_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => i_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => i_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => i_cuda_multi_amax +!!$ procedure, pass(x) :: asum => i_cuda_multi_asum + procedure, pass(x) :: all => i_cuda_multi_all + procedure, pass(x) :: zero => i_cuda_multi_zero + procedure, pass(x) :: asb => i_cuda_multi_asb + procedure, pass(x) :: sync => i_cuda_multi_sync + procedure, pass(x) :: sync_space => i_cuda_multi_sync_space + procedure, pass(x) :: bld_x => i_cuda_multi_bld_x + procedure, pass(x) :: bld_n => i_cuda_multi_bld_n + procedure, pass(x) :: free => i_cuda_multi_free + procedure, pass(x) :: ins => i_cuda_multi_ins + procedure, pass(x) :: is_host => i_cuda_multi_is_host + procedure, pass(x) :: is_dev => i_cuda_multi_is_dev + procedure, pass(x) :: is_sync => i_cuda_multi_is_sync + procedure, pass(x) :: set_host => i_cuda_multi_set_host + procedure, pass(x) :: set_dev => i_cuda_multi_set_dev + procedure, pass(x) :: set_sync => i_cuda_multi_set_sync + procedure, pass(x) :: set_scal => i_cuda_multi_set_scal + procedure, pass(x) :: set_vect => i_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => i_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => i_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => i_cuda_multi_sctb_x + final :: i_cuda_multi_vect_finalize #endif - end type psb_i_multivect_gpu + end type psb_i_multivect_cuda - public :: psb_i_multivect_gpu + public :: psb_i_multivect_cuda private :: constructor - interface psb_i_multivect_gpu + interface psb_i_multivect_cuda module procedure constructor end interface @@ -971,7 +971,7 @@ contains function constructor(x) result(this) integer(psb_ipk_) :: x(:,:) - type(psb_i_multivect_gpu) :: this + type(psb_i_multivect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -981,15 +981,15 @@ contains #ifdef HAVE_SPGPU -!!$ subroutine i_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ subroutine i_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i,n !!$ class(psb_i_base_multivect_type) :: idx !!$ integer(psb_ipk_) :: y(:) -!!$ class(psb_i_multivect_gpu) :: x +!!$ class(psb_i_multivect_cuda) :: x !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (x%is_host()) call x%sync() !!$ @@ -1014,16 +1014,16 @@ contains !!$ end select !!$ !!$ -!!$ end subroutine i_gpu_multi_gthzv_x +!!$ end subroutine i_cuda_multi_gthzv_x !!$ !!$ !!$ -!!$ subroutine i_gpu_multi_sctb(n,idx,x,beta,y) +!!$ subroutine i_cuda_multi_sctb(n,idx,x,beta,y) !!$ implicit none !!$ !use psb_const_mod !!$ integer(psb_ipk_) :: n, idx(:) !!$ integer(psb_ipk_) :: beta, x(:) -!!$ class(psb_i_multivect_gpu) :: y +!!$ class(psb_i_multivect_cuda) :: y !!$ integer(psb_ipk_) :: info !!$ !!$ if (n == 0) return @@ -1033,17 +1033,17 @@ contains !!$ call y%psb_i_base_multivect_type%sctb(n,idx,x,beta) !!$ call y%set_host() !!$ -!!$ end subroutine i_gpu_multi_sctb +!!$ end subroutine i_cuda_multi_sctb !!$ -!!$ subroutine i_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ subroutine i_cuda_multi_sctb_x(i,n,idx,x,beta,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i, n !!$ class(psb_i_base_multivect_type) :: idx !!$ integer(psb_ipk_) :: beta, x(:) -!!$ class(psb_i_multivect_gpu) :: y +!!$ class(psb_i_multivect_cuda) :: y !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (y%is_host()) call y%sync() !!$ @@ -1069,13 +1069,13 @@ contains !!$ call y%sct(n,ii%v(i:),x,beta) !!$ end select !!$ -!!$ end subroutine i_gpu_multi_sctb_x +!!$ end subroutine i_cuda_multi_sctb_x - subroutine i_gpu_multi_bld_x(x,this) + subroutine i_cuda_multi_bld_x(x,this) use psb_base_mod integer(psb_ipk_), intent(in) :: this(:,:) - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info, m, n m=size(this,1) @@ -1085,101 +1085,101 @@ contains call psb_realloc(m,n,x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'i_gpu_multi_bld_x',& + call psb_errpush(info,'i_cuda_multi_bld_x',& & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) end if x%v(1:m,1:n) = this(1:m,1:n) call x%set_host() call x%sync() - end subroutine i_gpu_multi_bld_x + end subroutine i_cuda_multi_bld_x - subroutine i_gpu_multi_bld_n(x,m,n) + subroutine i_cuda_multi_bld_n(x,m,n) integer(psb_ipk_), intent(in) :: m,n - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(m,n,info) if (info /= 0) then - call psb_errpush(info,'i_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + call psb_errpush(info,'i_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) end if - end subroutine i_gpu_multi_bld_n + end subroutine i_cuda_multi_bld_n - subroutine i_gpu_multi_set_host(x) + subroutine i_cuda_multi_set_host(x) implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x x%state = is_host - end subroutine i_gpu_multi_set_host + end subroutine i_cuda_multi_set_host - subroutine i_gpu_multi_set_dev(x) + subroutine i_cuda_multi_set_dev(x) implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x x%state = is_dev - end subroutine i_gpu_multi_set_dev + end subroutine i_cuda_multi_set_dev - subroutine i_gpu_multi_set_sync(x) + subroutine i_cuda_multi_set_sync(x) implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x x%state = is_sync - end subroutine i_gpu_multi_set_sync + end subroutine i_cuda_multi_set_sync - function i_gpu_multi_is_dev(x) result(res) + function i_cuda_multi_is_dev(x) result(res) implicit none - class(psb_i_multivect_gpu), intent(in) :: x + class(psb_i_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function i_gpu_multi_is_dev + end function i_cuda_multi_is_dev - function i_gpu_multi_is_host(x) result(res) + function i_cuda_multi_is_host(x) result(res) implicit none - class(psb_i_multivect_gpu), intent(in) :: x + class(psb_i_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function i_gpu_multi_is_host + end function i_cuda_multi_is_host - function i_gpu_multi_is_sync(x) result(res) + function i_cuda_multi_is_sync(x) result(res) implicit none - class(psb_i_multivect_gpu), intent(in) :: x + class(psb_i_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function i_gpu_multi_is_sync + end function i_cuda_multi_is_sync - function i_gpu_multi_get_nrows(x) result(res) + function i_cuda_multi_get_nrows(x) result(res) implicit none - class(psb_i_multivect_gpu), intent(in) :: x + class(psb_i_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_nrows - end function i_gpu_multi_get_nrows + end function i_cuda_multi_get_nrows - function i_gpu_multi_get_ncols(x) result(res) + function i_cuda_multi_get_ncols(x) result(res) implicit none - class(psb_i_multivect_gpu), intent(in) :: x + class(psb_i_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_ncols - end function i_gpu_multi_get_ncols + end function i_cuda_multi_get_ncols - function i_gpu_multi_get_fmt() result(res) + function i_cuda_multi_get_fmt() result(res) implicit none character(len=5) :: res res = 'iGPU' - end function i_gpu_multi_get_fmt + end function i_cuda_multi_get_fmt -!!$ function i_gpu_multi_dot_v(n,x,y) result(res) +!!$ function i_cuda_multi_dot_v(n,x,y) result(res) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ class(psb_i_base_multivect_type), intent(inout) :: y !!$ integer(psb_ipk_), intent(in) :: n !!$ integer(psb_ipk_) :: res @@ -1196,13 +1196,13 @@ contains !!$ type is (psb_i_base_multivect_type) !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,x%v,1,yy%v,1) -!!$ type is (psb_i_multivect_gpu) +!!$ type is (psb_i_multivect_cuda) !!$ if (x%is_host()) call x%sync() !!$ if (yy%is_host()) call yy%sync() !!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) !!$ if (info /= 0) then !!$ info = psb_err_internal_error_ -!!$ call psb_errpush(info,'i_gpu_multi_dot_v') +!!$ call psb_errpush(info,'i_cuda_multi_dot_v') !!$ end if !!$ !!$ class default @@ -1211,11 +1211,11 @@ contains !!$ res = y%dot(n,x%v) !!$ end select !!$ -!!$ end function i_gpu_multi_dot_v +!!$ end function i_cuda_multi_dot_v !!$ -!!$ function i_gpu_multi_dot_a(n,x,y) result(res) +!!$ function i_cuda_multi_dot_a(n,x,y) result(res) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: y(:) !!$ integer(psb_ipk_), intent(in) :: n !!$ integer(psb_ipk_) :: res @@ -1224,14 +1224,14 @@ contains !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,y,1,x%v,1) !!$ -!!$ end function i_gpu_multi_dot_a +!!$ end function i_cuda_multi_dot_a !!$ -!!$ subroutine i_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine i_cuda_multi_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ class(psb_i_base_multivect_type), intent(inout) :: x -!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ class(psb_i_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: nx, ny @@ -1244,7 +1244,7 @@ contains !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() -!!$ type is (psb_i_multivect_gpu) +!!$ type is (psb_i_multivect_cuda) !!$ ! Do something different here !!$ if ((beta /= dzero).and.y%is_host())& !!$ & call y%sync() @@ -1263,27 +1263,27 @@ contains !!$ call y%axpby(m,alpha,x%v,beta,info) !!$ end select !!$ -!!$ end subroutine i_gpu_multi_axpby_v +!!$ end subroutine i_cuda_multi_axpby_v !!$ -!!$ subroutine i_gpu_multi_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine i_cuda_multi_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ integer(psb_ipk_), intent(in) :: x(:) -!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ class(psb_i_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ if (y%is_dev()) call y%sync() !!$ call psb_geaxpby(m,alpha,x,beta,y%v,info) !!$ call y%set_host() -!!$ end subroutine i_gpu_multi_axpby_a +!!$ end subroutine i_cuda_multi_axpby_a !!$ -!!$ subroutine i_gpu_multi_mlt_v(x, y, info) +!!$ subroutine i_cuda_multi_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_i_base_multivect_type), intent(inout) :: x -!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ class(psb_i_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ integer(psb_ipk_) :: i, n @@ -1297,7 +1297,7 @@ contains !!$ y%v(i) = y%v(i) * xx%v(i) !!$ end do !!$ call y%set_host() -!!$ type is (psb_i_multivect_gpu) +!!$ type is (psb_i_multivect_cuda) !!$ ! Do something different here !!$ if (y%is_host()) call y%sync() !!$ if (xx%is_host()) call xx%sync() @@ -1309,13 +1309,13 @@ contains !!$ call y%set_host() !!$ end select !!$ -!!$ end subroutine i_gpu_multi_mlt_v +!!$ end subroutine i_cuda_multi_mlt_v !!$ -!!$ subroutine i_gpu_multi_mlt_a(x, y, info) +!!$ subroutine i_cuda_multi_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: x(:) -!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ class(psb_i_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1323,15 +1323,15 @@ contains !!$ call y%sync() !!$ call y%psb_i_base_multivect_type%mlt(x,info) !!$ call y%set_host() -!!$ end subroutine i_gpu_multi_mlt_a +!!$ end subroutine i_cuda_multi_mlt_a !!$ -!!$ subroutine i_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine i_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: alpha,beta !!$ integer(psb_ipk_), intent(in) :: x(:) !!$ integer(psb_ipk_), intent(in) :: y(:) -!!$ class(psb_i_multivect_gpu), intent(inout) :: z +!!$ class(psb_i_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1339,16 +1339,16 @@ contains !!$ if (z%is_dev()) call z%sync() !!$ call z%psb_i_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() -!!$ end subroutine i_gpu_multi_mlt_a_2 +!!$ end subroutine i_cuda_multi_mlt_a_2 !!$ -!!$ subroutine i_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ subroutine i_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: alpha,beta !!$ class(psb_i_base_multivect_type), intent(inout) :: x !!$ class(psb_i_base_multivect_type), intent(inout) :: y -!!$ class(psb_i_multivect_gpu), intent(inout) :: z +!!$ class(psb_i_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ character(len=1), intent(in), optional :: conjgx, conjgy !!$ integer(psb_ipk_) :: i, n @@ -1371,9 +1371,9 @@ contains !!$ ! !!$ info = 0 !!$ select type(xx => x) -!!$ type is (psb_i_multivect_gpu) +!!$ type is (psb_i_multivect_cuda) !!$ select type (yy => y) -!!$ type is (psb_i_multivect_gpu) +!!$ type is (psb_i_multivect_cuda) !!$ if (xx%is_host()) call xx%sync() !!$ if (yy%is_host()) call yy%sync() !!$ ! Z state is irrelevant: it will be done on the GPU. @@ -1393,11 +1393,11 @@ contains !!$ call z%psb_i_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() !!$ end select -!!$ end subroutine i_gpu_multi_mlt_v_2 +!!$ end subroutine i_cuda_multi_mlt_v_2 - subroutine i_gpu_multi_set_scal(x,val) - class(psb_i_multivect_gpu), intent(inout) :: x + subroutine i_cuda_multi_set_scal(x,val) + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: val integer(psb_ipk_) :: info @@ -1405,10 +1405,10 @@ contains if (x%is_dev()) call x%sync() call x%psb_i_base_multivect_type%set_scal(val) call x%set_host() - end subroutine i_gpu_multi_set_scal + end subroutine i_cuda_multi_set_scal - subroutine i_gpu_multi_set_vect(x,val) - class(psb_i_multivect_gpu), intent(inout) :: x + subroutine i_cuda_multi_set_vect(x,val) + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: val(:,:) integer(psb_ipk_) :: nr integer(psb_ipk_) :: info @@ -1417,24 +1417,24 @@ contains call x%psb_i_base_multivect_type%set_vect(val) call x%set_host() - end subroutine i_gpu_multi_set_vect + end subroutine i_cuda_multi_set_vect -!!$ subroutine i_gpu_multi_scal(alpha, x) +!!$ subroutine i_cuda_multi_scal(alpha, x) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent (in) :: alpha !!$ !!$ if (x%is_dev()) call x%sync() !!$ call x%psb_i_base_multivect_type%scal(alpha) !!$ call x%set_host() -!!$ end subroutine i_gpu_multi_scal +!!$ end subroutine i_cuda_multi_scal !!$ !!$ -!!$ function i_gpu_multi_nrm2(n,x) result(res) +!!$ function i_cuda_multi_nrm2(n,x) result(res) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ integer(psb_ipk_) :: res !!$ integer(psb_ipk_) :: info @@ -1442,36 +1442,36 @@ contains !!$ if (x%is_host()) call x%sync() !!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) !!$ -!!$ end function i_gpu_multi_nrm2 +!!$ end function i_cuda_multi_nrm2 !!$ -!!$ function i_gpu_multi_amax(n,x) result(res) +!!$ function i_cuda_multi_amax(n,x) result(res) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ integer(psb_ipk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = maxval(abs(x%v(1:n))) !!$ -!!$ end function i_gpu_multi_amax +!!$ end function i_cuda_multi_amax !!$ -!!$ function i_gpu_multi_asum(n,x) result(res) +!!$ function i_cuda_multi_asum(n,x) result(res) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ integer(psb_ipk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = sum(abs(x%v(1:n))) !!$ -!!$ end function i_gpu_multi_asum +!!$ end function i_cuda_multi_asum - subroutine i_gpu_multi_all(m,n, x, info) + subroutine i_cuda_multi_all(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_i_multivect_gpu), intent(out) :: x + class(psb_i_multivect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(m,n,x%v,info,pad=izero) @@ -1481,26 +1481,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'i_gpu_multi_all',& + call psb_errpush(info,'i_cuda_multi_all',& & i_err=(/m,n,n,n,n/)) end if - end subroutine i_gpu_multi_all + end subroutine i_cuda_multi_all - subroutine i_gpu_multi_zero(x) + subroutine i_cuda_multi_zero(x) use psi_serial_mod implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine i_gpu_multi_zero + end subroutine i_cuda_multi_zero - subroutine i_gpu_multi_asb(m,n, x, info) + subroutine i_cuda_multi_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nd, nc @@ -1520,12 +1520,12 @@ contains call x%set_host() end if end if - end subroutine i_gpu_multi_asb + end subroutine i_cuda_multi_asb - subroutine i_gpu_multi_sync_space(x,info) + subroutine i_cuda_multi_sync_space(x,info) use psb_realloc_mod implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: mh,nh,md,nd @@ -1578,11 +1578,11 @@ contains end if - end subroutine i_gpu_multi_sync_space + end subroutine i_cuda_multi_sync_space - subroutine i_gpu_multi_sync(x) + subroutine i_cuda_multi_sync(x) implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -1598,16 +1598,16 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'i_gpu_multi_sync') + call psb_errpush(info,'i_cuda_multi_sync') end if - end subroutine i_gpu_multi_sync + end subroutine i_cuda_multi_sync - subroutine i_gpu_multi_free(x, info) + subroutine i_cuda_multi_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 @@ -1622,13 +1622,13 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine i_gpu_multi_free + end subroutine i_cuda_multi_free - subroutine i_gpu_multi_vect_finalize(x) + subroutine i_cuda_multi_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_i_multivect_gpu), intent(inout) :: x + type(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 @@ -1643,12 +1643,12 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine i_gpu_multi_vect_finalize + end subroutine i_cuda_multi_vect_finalize - subroutine i_gpu_multi_ins(n,irl,val,dupl,x,info) + subroutine i_cuda_multi_ins(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) integer(psb_ipk_), intent(in) :: val(:,:) @@ -1661,11 +1661,11 @@ contains call x%psb_i_base_multivect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine i_gpu_multi_ins + end subroutine i_cuda_multi_ins #endif -end module psb_i_gpu_multivect_mod +end module psb_i_cuda_multivect_mod diff --git a/cuda/psb_s_csrg_mat_mod.F90 b/cuda/psb_s_csrg_mat_mod.F90 deleted file mode 100644 index cface9f5..00000000 --- a/cuda/psb_s_csrg_mat_mod.F90 +++ /dev/null @@ -1,393 +0,0 @@ -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! - - -module psb_s_csrg_mat_mod - - use iso_c_binding - use psb_s_mat_mod - use cusparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_s_csr_sparse_mat) :: psb_s_csrg_sparse_mat - ! - ! cuSPARSE 4.0 CSR format. - ! - ! - ! - ! - ! -#ifdef HAVE_SPGPU - type(s_Cmat) :: deviceMat - integer(psb_ipk_) :: devstate = is_host - - contains - procedure, nopass :: get_fmt => s_csrg_get_fmt - procedure, pass(a) :: sizeof => s_csrg_sizeof - procedure, pass(a) :: vect_mv => psb_s_csrg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_s_csrg_inner_vect_sv - procedure, pass(a) :: csmm => psb_s_csrg_csmm - procedure, pass(a) :: csmv => psb_s_csrg_csmv - procedure, pass(a) :: scals => psb_s_csrg_scals - procedure, pass(a) :: scalv => psb_s_csrg_scal - procedure, pass(a) :: reallocate_nz => psb_s_csrg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_s_csrg_allocate_mnnz - ! Note: we do *not* need the TO methods, because the parent type - ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_csrg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_cp_csrg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_csrg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_mv_csrg_from_fmt - procedure, pass(a) :: free => s_csrg_free - procedure, pass(a) :: mold => psb_s_csrg_mold - procedure, pass(a) :: is_host => s_csrg_is_host - procedure, pass(a) :: is_dev => s_csrg_is_dev - procedure, pass(a) :: is_sync => s_csrg_is_sync - procedure, pass(a) :: set_host => s_csrg_set_host - procedure, pass(a) :: set_dev => s_csrg_set_dev - procedure, pass(a) :: set_sync => s_csrg_set_sync - procedure, pass(a) :: sync => s_csrg_sync - procedure, pass(a) :: to_gpu => psb_s_csrg_to_gpu - procedure, pass(a) :: from_gpu => psb_s_csrg_from_gpu - final :: s_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_csrg_mold -#endif - end type psb_s_csrg_sparse_mat - -#ifdef HAVE_SPGPU - private :: s_csrg_get_nzeros, s_csrg_free, s_csrg_get_fmt, & - & s_csrg_get_size, s_csrg_sizeof, s_csrg_get_nz_row - - - interface - subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta - class(psb_s_base_vect_type), intent(inout) :: x - class(psb_s_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_csrg_inner_vect_sv - end interface - - - interface - subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta - class(psb_s_base_vect_type), intent(inout) :: x - class(psb_s_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_csrg_vect_mv - end interface - - interface - subroutine psb_s_csrg_reallocate_nz(nz,a) - import :: psb_s_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: nz - class(psb_s_csrg_sparse_mat), intent(inout) :: a - end subroutine psb_s_csrg_reallocate_nz - end interface - - interface - subroutine psb_s_csrg_allocate_mnnz(m,n,a,nz) - import :: psb_s_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: m,n - class(psb_s_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_s_csrg_allocate_mnnz - end interface - - interface - subroutine psb_s_csrg_mold(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_csrg_mold - end interface - - interface - subroutine psb_s_csrg_to_gpu(a,info, nzrm) - import :: psb_s_csrg_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_s_csrg_to_gpu - end interface - - interface - subroutine psb_s_csrg_from_gpu(a,info) - import :: psb_s_csrg_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_csrg_from_gpu - end interface - - interface - subroutine psb_s_cp_csrg_from_coo(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - class(psb_s_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_csrg_from_coo - end interface - - interface - subroutine psb_s_cp_csrg_from_fmt(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - class(psb_s_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_csrg_from_fmt - end interface - - interface - subroutine psb_s_mv_csrg_from_coo(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - class(psb_s_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_csrg_from_coo - end interface - - interface - subroutine psb_s_mv_csrg_from_fmt(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - class(psb_s_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_csrg_from_fmt - end interface - - interface - subroutine psb_s_csrg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta, x(:) - real(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_csrg_csmv - end interface - interface - subroutine psb_s_csrg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta, x(:,:) - real(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_csrg_csmm - end interface - - interface - subroutine psb_s_csrg_scal(d,a,info,side) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - real(psb_spk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side - end subroutine psb_s_csrg_scal - end interface - - interface - subroutine psb_s_csrg_scals(d,a,info) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - real(psb_spk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_csrg_scals - end interface - - -contains - - ! == =================================== - ! - ! - ! - ! Getters - ! - ! - ! - ! - ! - ! == =================================== - - - function s_csrg_sizeof(a) result(res) - implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - res = 8 - res = res + psb_sizeof_sp * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - ! Should we account for the shadow data structure - ! on the GPU device side? - ! res = 2*res - - end function s_csrg_sizeof - - function s_csrg_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSRG' - end function s_csrg_get_fmt - - - - ! == =================================== - ! - ! - ! - ! Data management - ! - ! - ! - ! - ! - ! == =================================== - - - subroutine s_csrg_set_host(a) - implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine s_csrg_set_host - - subroutine s_csrg_set_dev(a) - implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine s_csrg_set_dev - - subroutine s_csrg_set_sync(a) - implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine s_csrg_set_sync - - function s_csrg_is_dev(a) result(res) - implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function s_csrg_is_dev - - function s_csrg_is_host(a) result(res) - implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function s_csrg_is_host - - function s_csrg_is_sync(a) result(res) - implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function s_csrg_is_sync - - - subroutine s_csrg_sync(a) - implicit none - class(psb_s_csrg_sparse_mat), target, intent(in) :: a - class(psb_s_csrg_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (tmpa%is_host()) then - call tmpa%to_gpu(info) - else if (tmpa%is_dev()) then - call tmpa%from_gpu(info) - end if - call tmpa%set_sync() - return - - end subroutine s_csrg_sync - - subroutine s_csrg_free(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - class(psb_s_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - call a%psb_s_csr_sparse_mat%free() - - return - - end subroutine s_csrg_free - - subroutine s_csrg_finalize(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - type(psb_s_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - - return - - end subroutine s_csrg_finalize - -#else - interface - subroutine psb_s_csrg_mold(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_csrg_mold - end interface - -#endif - -end module psb_s_csrg_mat_mod diff --git a/cuda/psb_s_cuda_csrg_mat_mod.F90 b/cuda/psb_s_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..fb13d034 --- /dev/null +++ b/cuda/psb_s_cuda_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_cuda_csrg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_csr_sparse_mat) :: psb_s_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(s_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => s_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_s_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_s_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => s_cuda_csrg_free + procedure, pass(a) :: mold => psb_s_cuda_csrg_mold + procedure, pass(a) :: is_host => s_cuda_csrg_is_host + procedure, pass(a) :: is_dev => s_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => s_cuda_csrg_is_sync + procedure, pass(a) :: set_host => s_cuda_csrg_set_host + procedure, pass(a) :: set_dev => s_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => s_cuda_csrg_set_sync + procedure, pass(a) :: sync => s_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_s_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_s_cuda_csrg_from_gpu + final :: s_cuda_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_cuda_csrg_mold +#endif + end type psb_s_cuda_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_cuda_csrg_get_nzeros, s_cuda_csrg_free, s_cuda_csrg_get_fmt, & + & s_cuda_csrg_get_size, s_cuda_csrg_sizeof, s_cuda_csrg_get_nz_row + + + interface + subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_s_cuda_csrg_reallocate_nz(nz,a) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_s_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_s_cuda_csrg_mold(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_mold + end interface + + interface + subroutine psb_s_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_s_cuda_csrg_from_gpu(a,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_s_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_s_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_s_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_s_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_csmv + end interface + interface + subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_csmm + end interface + + interface + subroutine psb_s_cuda_csrg_scal(d,a,info,side) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_cuda_csrg_scal + end interface + + interface + subroutine psb_s_cuda_csrg_scals(d,a,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_cuda_csrg_sizeof + + function s_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function s_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine s_cuda_csrg_set_host(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_cuda_csrg_set_host + + subroutine s_cuda_csrg_set_dev(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_cuda_csrg_set_dev + + subroutine s_cuda_csrg_set_sync(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_cuda_csrg_set_sync + + function s_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_cuda_csrg_is_dev + + function s_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_cuda_csrg_is_host + + function s_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_cuda_csrg_is_sync + + + subroutine s_cuda_csrg_sync(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine s_cuda_csrg_sync + + subroutine s_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_s_csr_sparse_mat%free() + + return + + end subroutine s_cuda_csrg_free + + subroutine s_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine s_cuda_csrg_finalize + +#else + interface + subroutine psb_s_cuda_csrg_mold(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_mold + end interface + +#endif + +end module psb_s_cuda_csrg_mat_mod diff --git a/cuda/psb_s_diag_mat_mod.F90 b/cuda/psb_s_cuda_diag_mat_mod.F90 similarity index 52% rename from cuda/psb_s_diag_mat_mod.F90 rename to cuda/psb_s_cuda_diag_mat_mod.F90 index 1ed54f88..709cd728 100644 --- a/cuda/psb_s_diag_mat_mod.F90 +++ b/cuda/psb_s_cuda_diag_mat_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_s_diag_mat_mod +module psb_s_cuda_diag_mat_mod use iso_c_binding use psb_base_mod use psb_s_dia_mat_mod - type, extends(psb_s_dia_sparse_mat) :: psb_s_diag_sparse_mat + type, extends(psb_s_dia_sparse_mat) :: psb_s_cuda_diag_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -48,170 +48,170 @@ module psb_s_diag_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => s_diag_get_fmt - procedure, pass(a) :: sizeof => s_diag_sizeof - procedure, pass(a) :: vect_mv => psb_s_diag_vect_mv -! procedure, pass(a) :: csmm => psb_s_diag_csmm - procedure, pass(a) :: csmv => psb_s_diag_csmv -! procedure, pass(a) :: in_vect_sv => psb_s_diag_inner_vect_sv -! procedure, pass(a) :: scals => psb_s_diag_scals -! procedure, pass(a) :: scalv => psb_s_diag_scal -! procedure, pass(a) :: reallocate_nz => psb_s_diag_reallocate_nz -! procedure, pass(a) :: allocate_mnnz => psb_s_diag_allocate_mnnz + procedure, nopass :: get_fmt => s_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => s_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_s_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_s_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_s_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_s_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_s_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_s_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_s_cuda_diag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_diag_from_coo -! procedure, pass(a) :: cp_from_fmt => psb_s_cp_diag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_diag_from_coo -! procedure, pass(a) :: mv_from_fmt => psb_s_mv_diag_from_fmt - procedure, pass(a) :: free => s_diag_free - procedure, pass(a) :: mold => psb_s_diag_mold - procedure, pass(a) :: to_gpu => psb_s_diag_to_gpu - final :: s_diag_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => s_cuda_diag_free + procedure, pass(a) :: mold => psb_s_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_diag_to_gpu + final :: s_cuda_diag_finalize #else contains - procedure, pass(a) :: mold => psb_s_diag_mold + procedure, pass(a) :: mold => psb_s_cuda_diag_mold #endif - end type psb_s_diag_sparse_mat + end type psb_s_cuda_diag_sparse_mat #ifdef HAVE_SPGPU - private :: s_diag_get_nzeros, s_diag_free, s_diag_get_fmt, & - & s_diag_get_size, s_diag_sizeof, s_diag_get_nz_row + private :: s_cuda_diag_get_nzeros, s_cuda_diag_free, s_cuda_diag_get_fmt, & + & s_cuda_diag_get_size, s_cuda_diag_sizeof, s_cuda_diag_get_nz_row interface - subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_diag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_diag_vect_mv + end subroutine psb_s_cuda_diag_vect_mv end interface interface - subroutine psb_s_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_s_diag_sparse_mat, psb_spk_, psb_s_base_vect_type - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_cuda_diag_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_diag_inner_vect_sv + end subroutine psb_s_cuda_diag_inner_vect_sv end interface interface - subroutine psb_s_diag_reallocate_nz(nz,a) - import :: psb_s_diag_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_diag_reallocate_nz(nz,a) + import :: psb_s_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_s_diag_sparse_mat), intent(inout) :: a - end subroutine psb_s_diag_reallocate_nz + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_diag_reallocate_nz end interface interface - subroutine psb_s_diag_allocate_mnnz(m,n,a,nz) - import :: psb_s_diag_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_s_diag_allocate_mnnz + end subroutine psb_s_cuda_diag_allocate_mnnz end interface interface - subroutine psb_s_diag_mold(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_mold(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_diag_mold + end subroutine psb_s_cuda_diag_mold end interface interface - subroutine psb_s_diag_to_gpu(a,info, nzrm) - import :: psb_s_diag_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_s_cuda_diag_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_s_diag_to_gpu + end subroutine psb_s_cuda_diag_to_gpu end interface interface - subroutine psb_s_cp_diag_from_coo(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_diag_from_coo(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_diag_from_coo + end subroutine psb_s_cuda_cp_diag_from_coo end interface interface - subroutine psb_s_cp_diag_from_fmt(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_diag_from_fmt + end subroutine psb_s_cuda_cp_diag_from_fmt end interface interface - subroutine psb_s_mv_diag_from_coo(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_diag_from_coo(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_diag_from_coo + end subroutine psb_s_cuda_mv_diag_from_coo end interface interface - subroutine psb_s_mv_diag_from_fmt(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_diag_from_fmt + end subroutine psb_s_cuda_mv_diag_from_fmt end interface interface - subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_diag_csmv + end subroutine psb_s_cuda_diag_csmv end interface interface - subroutine psb_s_diag_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_diag_csmm + end subroutine psb_s_cuda_diag_csmm end interface interface - subroutine psb_s_diag_scal(d,a,info, side) - import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_diag_scal(d,a,info, side) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_s_diag_scal + end subroutine psb_s_cuda_diag_scal end interface interface - subroutine psb_s_diag_scals(d,a,info) - import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_diag_scals(d,a,info) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_diag_scals + end subroutine psb_s_cuda_diag_scals end interface @@ -230,9 +230,9 @@ contains ! == =================================== - function s_diag_sizeof(a) result(res) + function s_cuda_diag_sizeof(a) result(res) implicit none - class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function s_diag_sizeof + end function s_cuda_diag_sizeof - function s_diag_get_fmt() result(res) + function s_cuda_diag_get_fmt() result(res) implicit none character(len=5) :: res res = 'DIAG' - end function s_diag_get_fmt + end function s_cuda_diag_get_fmt @@ -265,11 +265,11 @@ contains ! ! == =================================== - subroutine s_diag_free(a) + subroutine s_cuda_diag_free(a) use diagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) @@ -278,31 +278,31 @@ contains return - end subroutine s_diag_free + end subroutine s_cuda_diag_free - subroutine s_diag_finalize(a) + subroutine s_cuda_diag_finalize(a) use diagdev_mod implicit none - type(psb_s_diag_sparse_mat), intent(inout) :: a + type(psb_s_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine s_diag_finalize + end subroutine s_cuda_diag_finalize #else interface - subroutine psb_s_diag_mold(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_mold(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_diag_mold + end subroutine psb_s_cuda_diag_mold end interface #endif -end module psb_s_diag_mat_mod +end module psb_s_cuda_diag_mat_mod diff --git a/cuda/psb_s_dnsg_mat_mod.F90 b/cuda/psb_s_cuda_dnsg_mat_mod.F90 similarity index 51% rename from cuda/psb_s_dnsg_mat_mod.F90 rename to cuda/psb_s_cuda_dnsg_mat_mod.F90 index 1c531463..b01c8365 100644 --- a/cuda/psb_s_dnsg_mat_mod.F90 +++ b/cuda/psb_s_cuda_dnsg_mat_mod.F90 @@ -30,14 +30,14 @@ ! -module psb_s_dnsg_mat_mod +module psb_s_cuda_dnsg_mat_mod use iso_c_binding use psb_s_mat_mod use psb_s_dns_mat_mod use dnsdev_mod - type, extends(psb_s_dns_sparse_mat) :: psb_s_dnsg_sparse_mat + type, extends(psb_s_dns_sparse_mat) :: psb_s_cuda_dnsg_sparse_mat ! ! ITPACK/DNS format, extended. ! We are adding here the routines to create a copy of the data @@ -49,169 +49,169 @@ module psb_s_dnsg_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => s_dnsg_get_fmt - ! procedure, pass(a) :: sizeof => s_dnsg_sizeof - procedure, pass(a) :: vect_mv => psb_s_dnsg_vect_mv -!!$ procedure, pass(a) :: csmm => psb_s_dnsg_csmm -!!$ procedure, pass(a) :: csmv => psb_s_dnsg_csmv -!!$ procedure, pass(a) :: in_vect_sv => psb_s_dnsg_inner_vect_sv -!!$ procedure, pass(a) :: scals => psb_s_dnsg_scals -!!$ procedure, pass(a) :: scalv => psb_s_dnsg_scal -!!$ procedure, pass(a) :: reallocate_nz => psb_s_dnsg_reallocate_nz -!!$ procedure, pass(a) :: allocate_mnnz => psb_s_dnsg_allocate_mnnz + procedure, nopass :: get_fmt => s_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => s_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_s_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_s_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_s_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_s_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_s_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_s_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_s_cuda_dnsg_allocate_mnnz ! Note: we *do* need the TO methods, because of the need to invoke SYNC ! - procedure, pass(a) :: cp_from_coo => psb_s_cp_dnsg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_cp_dnsg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_dnsg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_mv_dnsg_from_fmt - procedure, pass(a) :: free => s_dnsg_free - procedure, pass(a) :: mold => psb_s_dnsg_mold - procedure, pass(a) :: to_gpu => psb_s_dnsg_to_gpu - final :: s_dnsg_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => s_cuda_dnsg_free + procedure, pass(a) :: mold => psb_s_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_dnsg_to_gpu + final :: s_cuda_dnsg_finalize #else contains - procedure, pass(a) :: mold => psb_s_dnsg_mold + procedure, pass(a) :: mold => psb_s_cuda_dnsg_mold #endif - end type psb_s_dnsg_sparse_mat + end type psb_s_cuda_dnsg_sparse_mat #ifdef HAVE_SPGPU - private :: s_dnsg_get_nzeros, s_dnsg_free, s_dnsg_get_fmt, & - & s_dnsg_get_size, s_dnsg_get_nz_row + private :: s_cuda_dnsg_get_nzeros, s_cuda_dnsg_free, s_cuda_dnsg_get_fmt, & + & s_cuda_dnsg_get_size, s_cuda_dnsg_get_nz_row interface - subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_dnsg_vect_mv + end subroutine psb_s_cuda_dnsg_vect_mv end interface !!$ !!$ interface -!!$ subroutine psb_s_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_s_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type -!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_s_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_dnsg_inner_vect_sv +!!$ end subroutine psb_s_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_s_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_s_dnsg_reallocate_nz +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_s_dnsg_allocate_mnnz +!!$ end subroutine psb_s_cuda_dnsg_allocate_mnnz !!$ end interface interface - subroutine psb_s_dnsg_mold(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_dnsg_mold(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_dnsg_mold + end subroutine psb_s_cuda_dnsg_mold end interface interface - subroutine psb_s_dnsg_to_gpu(a,info) - import :: psb_s_dnsg_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_dnsg_to_gpu(a,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_dnsg_to_gpu + end subroutine psb_s_cuda_dnsg_to_gpu end interface interface - subroutine psb_s_cp_dnsg_from_coo(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_dnsg_from_coo + end subroutine psb_s_cuda_cp_dnsg_from_coo end interface interface - subroutine psb_s_cp_dnsg_from_fmt(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_dnsg_from_fmt + end subroutine psb_s_cuda_cp_dnsg_from_fmt end interface interface - subroutine psb_s_mv_dnsg_from_coo(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_dnsg_from_coo + end subroutine psb_s_cuda_mv_dnsg_from_coo end interface interface - subroutine psb_s_mv_dnsg_from_fmt(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_dnsg_from_fmt + end subroutine psb_s_cuda_mv_dnsg_from_fmt end interface !!$ interface -!!$ subroutine psb_s_dnsg_csmv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta, x(:) !!$ real(psb_spk_), intent(inout) :: y(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_dnsg_csmv +!!$ end subroutine psb_s_cuda_dnsg_csmv !!$ end interface !!$ interface -!!$ subroutine psb_s_dnsg_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) !!$ real(psb_spk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_dnsg_csmm +!!$ end subroutine psb_s_cuda_dnsg_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_dnsg_scal(d,a,info, side) -!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ real(psb_spk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_s_dnsg_scal +!!$ end subroutine psb_s_cuda_dnsg_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_dnsg_scals(d,a,info) -!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ real(psb_spk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_s_dnsg_scals +!!$ end subroutine psb_s_cuda_dnsg_scals !!$ end interface !!$ @@ -231,11 +231,11 @@ contains - function s_dnsg_get_fmt() result(res) + function s_cuda_dnsg_get_fmt() result(res) implicit none character(len=5) :: res res = 'DNSG' - end function s_dnsg_get_fmt + end function s_cuda_dnsg_get_fmt @@ -251,11 +251,11 @@ contains ! ! == =================================== - subroutine s_dnsg_free(a) + subroutine s_cuda_dnsg_free(a) use dnsdev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) @@ -264,31 +264,31 @@ contains return - end subroutine s_dnsg_free + end subroutine s_cuda_dnsg_free - subroutine s_dnsg_finalize(a) + subroutine s_cuda_dnsg_finalize(a) use dnsdev_mod implicit none - type(psb_s_dnsg_sparse_mat), intent(inout) :: a + type(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine s_dnsg_finalize + end subroutine s_cuda_dnsg_finalize #else interface - subroutine psb_s_dnsg_mold(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_dnsg_mold(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_dnsg_mold + end subroutine psb_s_cuda_dnsg_mold end interface #endif -end module psb_s_dnsg_mat_mod +end module psb_s_cuda_dnsg_mat_mod diff --git a/cuda/psb_s_elg_mat_mod.F90 b/cuda/psb_s_cuda_elg_mat_mod.F90 similarity index 50% rename from cuda/psb_s_elg_mat_mod.F90 rename to cuda/psb_s_cuda_elg_mat_mod.F90 index 5c4eae9b..d6b003b5 100644 --- a/cuda/psb_s_elg_mat_mod.F90 +++ b/cuda/psb_s_cuda_elg_mat_mod.F90 @@ -30,18 +30,18 @@ ! -module psb_s_elg_mat_mod +module psb_s_cuda_elg_mat_mod use iso_c_binding use psb_s_mat_mod use psb_s_ell_mat_mod - use psb_i_gpu_vect_mod + use psb_i_cuda_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_s_ell_sparse_mat) :: psb_s_elg_sparse_mat + type, extends(psb_s_ell_sparse_mat) :: psb_s_cuda_elg_sparse_mat ! ! ITPACK/ELL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,221 +54,221 @@ module psb_s_elg_mat_mod integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => s_elg_get_fmt - procedure, pass(a) :: sizeof => s_elg_sizeof - procedure, pass(a) :: vect_mv => psb_s_elg_vect_mv - procedure, pass(a) :: csmm => psb_s_elg_csmm - procedure, pass(a) :: csmv => psb_s_elg_csmv - procedure, pass(a) :: in_vect_sv => psb_s_elg_inner_vect_sv - procedure, pass(a) :: scals => psb_s_elg_scals - procedure, pass(a) :: scalv => psb_s_elg_scal - procedure, pass(a) :: reallocate_nz => psb_s_elg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_s_elg_allocate_mnnz - procedure, pass(a) :: reinit => s_elg_reinit + procedure, nopass :: get_fmt => s_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_s_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_s_cuda_elg_scals + procedure, pass(a) :: scalv => psb_s_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => s_cuda_elg_reinit ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_elg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_cp_elg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_elg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_mv_elg_from_fmt - procedure, pass(a) :: free => s_elg_free - procedure, pass(a) :: mold => psb_s_elg_mold - procedure, pass(a) :: csput_a => psb_s_elg_csput_a - procedure, pass(a) :: csput_v => psb_s_elg_csput_v - procedure, pass(a) :: is_host => s_elg_is_host - procedure, pass(a) :: is_dev => s_elg_is_dev - procedure, pass(a) :: is_sync => s_elg_is_sync - procedure, pass(a) :: set_host => s_elg_set_host - procedure, pass(a) :: set_dev => s_elg_set_dev - procedure, pass(a) :: set_sync => s_elg_set_sync - procedure, pass(a) :: sync => s_elg_sync - procedure, pass(a) :: from_gpu => psb_s_elg_from_gpu - procedure, pass(a) :: to_gpu => psb_s_elg_to_gpu - procedure, pass(a) :: asb => psb_s_elg_asb - final :: s_elg_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => s_cuda_elg_free + procedure, pass(a) :: mold => psb_s_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_s_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_s_cuda_elg_csput_v + procedure, pass(a) :: is_host => s_cuda_elg_is_host + procedure, pass(a) :: is_dev => s_cuda_elg_is_dev + procedure, pass(a) :: is_sync => s_cuda_elg_is_sync + procedure, pass(a) :: set_host => s_cuda_elg_set_host + procedure, pass(a) :: set_dev => s_cuda_elg_set_dev + procedure, pass(a) :: set_sync => s_cuda_elg_set_sync + procedure, pass(a) :: sync => s_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_s_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_s_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_s_cuda_elg_asb + final :: s_cuda_elg_finalize #else contains - procedure, pass(a) :: mold => psb_s_elg_mold - procedure, pass(a) :: asb => psb_s_elg_asb + procedure, pass(a) :: mold => psb_s_cuda_elg_mold + procedure, pass(a) :: asb => psb_s_cuda_elg_asb #endif - end type psb_s_elg_sparse_mat + end type psb_s_cuda_elg_sparse_mat #ifdef HAVE_SPGPU - private :: s_elg_get_nzeros, s_elg_free, s_elg_get_fmt, & - & s_elg_get_size, s_elg_sizeof, s_elg_get_nz_row, s_elg_sync + private :: s_cuda_elg_get_nzeros, s_cuda_elg_free, s_cuda_elg_get_fmt, & + & s_cuda_elg_get_size, s_cuda_elg_sizeof, s_cuda_elg_get_nz_row, s_cuda_elg_sync interface - subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_elg_vect_mv + end subroutine psb_s_cuda_elg_vect_mv end interface interface - subroutine psb_s_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_s_elg_sparse_mat, psb_spk_, psb_s_base_vect_type - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_cuda_elg_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_elg_inner_vect_sv + end subroutine psb_s_cuda_elg_inner_vect_sv end interface interface - subroutine psb_s_elg_reallocate_nz(nz,a) - import :: psb_s_elg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_elg_reallocate_nz(nz,a) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_s_elg_sparse_mat), intent(inout) :: a - end subroutine psb_s_elg_reallocate_nz + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_elg_reallocate_nz end interface interface - subroutine psb_s_elg_allocate_mnnz(m,n,a,nz) - import :: psb_s_elg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_s_elg_allocate_mnnz + end subroutine psb_s_cuda_elg_allocate_mnnz end interface interface - subroutine psb_s_elg_mold(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_mold(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_mold + end subroutine psb_s_cuda_elg_mold end interface interface - subroutine psb_s_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& & imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_csput_a + end subroutine psb_s_cuda_elg_csput_a end interface interface - subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_s_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_s_base_vect_type,& + subroutine psb_s_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_s_base_vect_type,& & psb_i_base_vect_type - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_csput_v + end subroutine psb_s_cuda_elg_csput_v end interface interface - subroutine psb_s_elg_from_gpu(a,info) - import :: psb_s_elg_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_elg_from_gpu(a,info) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_from_gpu + end subroutine psb_s_cuda_elg_from_gpu end interface interface - subroutine psb_s_elg_to_gpu(a,info, nzrm) - import :: psb_s_elg_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_s_elg_to_gpu + end subroutine psb_s_cuda_elg_to_gpu end interface interface - subroutine psb_s_cp_elg_from_coo(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_elg_from_coo + end subroutine psb_s_cuda_cp_elg_from_coo end interface interface - subroutine psb_s_cp_elg_from_fmt(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_elg_from_fmt + end subroutine psb_s_cuda_cp_elg_from_fmt end interface interface - subroutine psb_s_mv_elg_from_coo(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_elg_from_coo(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_elg_from_coo + end subroutine psb_s_cuda_mv_elg_from_coo end interface interface - subroutine psb_s_mv_elg_from_fmt(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_elg_from_fmt + end subroutine psb_s_cuda_mv_elg_from_fmt end interface interface - subroutine psb_s_elg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_elg_csmv + end subroutine psb_s_cuda_elg_csmv end interface interface - subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_elg_csmm + end subroutine psb_s_cuda_elg_csmm end interface interface - subroutine psb_s_elg_scal(d,a,info, side) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_elg_scal(d,a,info, side) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_s_elg_scal + end subroutine psb_s_cuda_elg_scal end interface interface - subroutine psb_s_elg_scals(d,a,info) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_elg_scals(d,a,info) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_scals + end subroutine psb_s_cuda_elg_scals end interface interface - subroutine psb_s_elg_asb(a) - import :: psb_s_elg_sparse_mat - class(psb_s_elg_sparse_mat), intent(inout) :: a - end subroutine psb_s_elg_asb + subroutine psb_s_cuda_elg_asb(a) + import :: psb_s_cuda_elg_sparse_mat + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_elg_asb end interface @@ -287,9 +287,9 @@ contains ! == =================================== - function s_elg_sizeof(a) result(res) + function s_cuda_elg_sizeof(a) result(res) implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res if (a%is_dev()) call a%sync() @@ -302,13 +302,13 @@ contains ! on the GPU device side? ! res = 2*res - end function s_elg_sizeof + end function s_cuda_elg_sizeof - function s_elg_get_fmt() result(res) + function s_cuda_elg_get_fmt() result(res) implicit none character(len=5) :: res res = 'ELG' - end function s_elg_get_fmt + end function s_cuda_elg_get_fmt @@ -323,12 +323,12 @@ contains ! ! ! == =================================== - subroutine s_elg_reinit(a,clear) + subroutine s_cuda_elg_reinit(a,clear) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a logical, intent(in), optional :: clear integer(psb_ipk_) :: isz, err_act character(len=20) :: name='reinit' @@ -367,14 +367,14 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine s_elg_reinit + end subroutine s_cuda_elg_reinit - subroutine s_elg_free(a) + subroutine s_cuda_elg_free(a) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) @@ -384,12 +384,12 @@ contains return - end subroutine s_elg_free + end subroutine s_cuda_elg_free - subroutine s_elg_sync(a) + subroutine s_cuda_elg_sync(a) implicit none - class(psb_s_elg_sparse_mat), target, intent(in) :: a - class(psb_s_elg_sparse_mat), pointer :: tmpa + class(psb_s_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -401,83 +401,83 @@ contains call tmpa%set_sync() return - end subroutine s_elg_sync + end subroutine s_cuda_elg_sync - subroutine s_elg_set_host(a) + subroutine s_cuda_elg_set_host(a) implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine s_elg_set_host + end subroutine s_cuda_elg_set_host - subroutine s_elg_set_dev(a) + subroutine s_cuda_elg_set_dev(a) implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine s_elg_set_dev + end subroutine s_cuda_elg_set_dev - subroutine s_elg_set_sync(a) + subroutine s_cuda_elg_set_sync(a) implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine s_elg_set_sync + end subroutine s_cuda_elg_set_sync - function s_elg_is_dev(a) result(res) + function s_cuda_elg_is_dev(a) result(res) implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function s_elg_is_dev + end function s_cuda_elg_is_dev - function s_elg_is_host(a) result(res) + function s_cuda_elg_is_host(a) result(res) implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function s_elg_is_host + end function s_cuda_elg_is_host - function s_elg_is_sync(a) result(res) + function s_cuda_elg_is_sync(a) result(res) implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function s_elg_is_sync + end function s_cuda_elg_is_sync - subroutine s_elg_finalize(a) + subroutine s_cuda_elg_finalize(a) use elldev_mod implicit none - type(psb_s_elg_sparse_mat), intent(inout) :: a + type(psb_s_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine s_elg_finalize + end subroutine s_cuda_elg_finalize #else interface - subroutine psb_s_elg_asb(a) - import :: psb_s_elg_sparse_mat - class(psb_s_elg_sparse_mat), intent(inout) :: a - end subroutine psb_s_elg_asb + subroutine psb_s_cuda_elg_asb(a) + import :: psb_s_cuda_elg_sparse_mat + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_elg_asb end interface interface - subroutine psb_s_elg_mold(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_mold(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_mold + end subroutine psb_s_cuda_elg_mold end interface #endif -end module psb_s_elg_mat_mod +end module psb_s_cuda_elg_mat_mod diff --git a/cuda/psb_s_hdiag_mat_mod.F90 b/cuda/psb_s_cuda_hdiag_mat_mod.F90 similarity index 50% rename from cuda/psb_s_hdiag_mat_mod.F90 rename to cuda/psb_s_cuda_hdiag_mat_mod.F90 index be0ef2b2..0a66ff09 100644 --- a/cuda/psb_s_hdiag_mat_mod.F90 +++ b/cuda/psb_s_cuda_hdiag_mat_mod.F90 @@ -30,182 +30,182 @@ ! -module psb_s_hdiag_mat_mod +module psb_s_cuda_hdiag_mat_mod use iso_c_binding use psb_base_mod use psb_s_hdia_mat_mod - type, extends(psb_s_hdia_sparse_mat) :: psb_s_hdiag_sparse_mat + type, extends(psb_s_hdia_sparse_mat) :: psb_s_cuda_hdiag_sparse_mat ! #ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => s_hdiag_get_fmt - ! procedure, pass(a) :: sizeof => s_hdiag_sizeof - procedure, pass(a) :: vect_mv => psb_s_hdiag_vect_mv - ! procedure, pass(a) :: csmm => psb_s_hdiag_csmm - procedure, pass(a) :: csmv => psb_s_hdiag_csmv - ! procedure, pass(a) :: in_vect_sv => psb_s_hdiag_inner_vect_sv - ! procedure, pass(a) :: scals => psb_s_hdiag_scals - ! procedure, pass(a) :: scalv => psb_s_hdiag_scal - ! procedure, pass(a) :: reallocate_nz => psb_s_hdiag_reallocate_nz - ! procedure, pass(a) :: allocate_mnnz => psb_s_hdiag_allocate_mnnz + procedure, nopass :: get_fmt => s_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => s_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_s_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_s_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_s_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_s_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_s_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_s_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_s_cuda_hdiag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_hdiag_from_coo - ! procedure, pass(a) :: cp_from_fmt => psb_s_cp_hdiag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_hdiag_from_coo - ! procedure, pass(a) :: mv_from_fmt => psb_s_mv_hdiag_from_fmt - procedure, pass(a) :: free => s_hdiag_free - procedure, pass(a) :: mold => psb_s_hdiag_mold - procedure, pass(a) :: to_gpu => psb_s_hdiag_to_gpu - final :: s_hdiag_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => s_cuda_hdiag_free + procedure, pass(a) :: mold => psb_s_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_hdiag_to_gpu + final :: s_cuda_hdiag_finalize #else contains - procedure, pass(a) :: mold => psb_s_hdiag_mold + procedure, pass(a) :: mold => psb_s_cuda_hdiag_mold #endif - end type psb_s_hdiag_sparse_mat + end type psb_s_cuda_hdiag_sparse_mat #ifdef HAVE_SPGPU - private :: s_hdiag_get_nzeros, s_hdiag_free, s_hdiag_get_fmt, & - & s_hdiag_get_size, s_hdiag_sizeof, s_hdiag_get_nz_row + private :: s_cuda_hdiag_get_nzeros, s_cuda_hdiag_free, s_cuda_hdiag_get_fmt, & + & s_cuda_hdiag_get_size, s_cuda_hdiag_sizeof, s_cuda_hdiag_get_nz_row interface - subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hdiag_vect_mv + end subroutine psb_s_cuda_hdiag_vect_mv end interface !!$ interface -!!$ subroutine psb_s_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_s_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type -!!$ class(psb_s_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_s_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_hdiag_inner_vect_sv +!!$ end subroutine psb_s_cuda_hdiag_inner_vect_sv !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_hdiag_reallocate_nz(nz,a) -!!$ import :: psb_s_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_s_hdiag_reallocate_nz +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_cuda_hdiag_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_hdiag_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_s_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_s_hdiag_allocate_mnnz +!!$ end subroutine psb_s_cuda_hdiag_allocate_mnnz !!$ end interface interface - subroutine psb_s_hdiag_mold(a,b,info) - import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hdiag_mold(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hdiag_mold + end subroutine psb_s_cuda_hdiag_mold end interface interface - subroutine psb_s_hdiag_to_gpu(a,info) - import :: psb_s_hdiag_sparse_mat, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hdiag_to_gpu(a,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hdiag_to_gpu + end subroutine psb_s_cuda_hdiag_to_gpu end interface interface - subroutine psb_s_cp_hdiag_from_coo(a,b,info) - import :: psb_s_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_hdiag_from_coo + end subroutine psb_s_cuda_cp_hdiag_from_coo end interface !!$ interface -!!$ subroutine psb_s_cp_hdiag_from_fmt(a,b,info) -!!$ import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_s_base_sparse_mat), intent(in) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_s_cp_hdiag_from_fmt +!!$ end subroutine psb_s_cuda_cp_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_s_mv_hdiag_from_coo(a,b,info) - import :: psb_s_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_hdiag_from_coo + end subroutine psb_s_cuda_mv_hdiag_from_coo end interface !!$ !!$ interface -!!$ subroutine psb_s_mv_hdiag_from_fmt(a,b,info) -!!$ import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_s_base_sparse_mat), intent(inout) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_s_mv_hdiag_from_fmt +!!$ end subroutine psb_s_cuda_mv_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hdiag_csmv + end subroutine psb_s_cuda_hdiag_csmv end interface !!$ interface -!!$ subroutine psb_s_hdiag_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) !!$ real(psb_spk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_hdiag_csmm +!!$ end subroutine psb_s_cuda_hdiag_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_hdiag_scal(d,a,info, side) -!!$ import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ real(psb_spk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_s_hdiag_scal +!!$ end subroutine psb_s_cuda_hdiag_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_hdiag_scals(d,a,info) -!!$ import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ real(psb_spk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_s_hdiag_scals +!!$ end subroutine psb_s_cuda_hdiag_scals !!$ end interface !!$ @@ -223,11 +223,11 @@ contains ! ! == =================================== - function s_hdiag_get_fmt() result(res) + function s_cuda_hdiag_get_fmt() result(res) implicit none character(len=5) :: res res = 'HDIAG' - end function s_hdiag_get_fmt + end function s_cuda_hdiag_get_fmt @@ -243,11 +243,11 @@ contains ! ! == =================================== - subroutine s_hdiag_free(a) + subroutine s_cuda_hdiag_free(a) use hdiagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -256,12 +256,12 @@ contains return - end subroutine s_hdiag_free + end subroutine s_cuda_hdiag_free - subroutine s_hdiag_finalize(a) + subroutine s_cuda_hdiag_finalize(a) use hdiagdev_mod implicit none - type(psb_s_hdiag_sparse_mat), intent(inout) :: a + type(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -269,19 +269,19 @@ contains call a%psb_s_hdia_sparse_mat%free() return - end subroutine s_hdiag_finalize + end subroutine s_cuda_hdiag_finalize #else interface - subroutine psb_s_hdiag_mold(a,b,info) - import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hdiag_mold(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hdiag_mold + end subroutine psb_s_cuda_hdiag_mold end interface #endif -end module psb_s_hdiag_mat_mod +end module psb_s_cuda_hdiag_mat_mod diff --git a/cuda/psb_s_hlg_mat_mod.F90 b/cuda/psb_s_cuda_hlg_mat_mod.F90 similarity index 50% rename from cuda/psb_s_hlg_mat_mod.F90 rename to cuda/psb_s_cuda_hlg_mat_mod.F90 index 8f896e4b..81b94e5d 100644 --- a/cuda/psb_s_hlg_mat_mod.F90 +++ b/cuda/psb_s_cuda_hlg_mat_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_s_hlg_mat_mod +module psb_s_cuda_hlg_mat_mod use iso_c_binding use psb_s_mat_mod @@ -41,7 +41,7 @@ module psb_s_hlg_mat_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_s_hll_sparse_mat) :: psb_s_hlg_sparse_mat + type, extends(psb_s_hll_sparse_mat) :: psb_s_cuda_hlg_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,186 +54,186 @@ module psb_s_hlg_mat_mod integer :: devstate = is_host contains - procedure, nopass :: get_fmt => s_hlg_get_fmt - procedure, pass(a) :: sizeof => s_hlg_sizeof - procedure, pass(a) :: vect_mv => psb_s_hlg_vect_mv - procedure, pass(a) :: csmm => psb_s_hlg_csmm - procedure, pass(a) :: csmv => psb_s_hlg_csmv - procedure, pass(a) :: in_vect_sv => psb_s_hlg_inner_vect_sv - procedure, pass(a) :: scals => psb_s_hlg_scals - procedure, pass(a) :: scalv => psb_s_hlg_scal - procedure, pass(a) :: reallocate_nz => psb_s_hlg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_s_hlg_allocate_mnnz + procedure, nopass :: get_fmt => s_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_s_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_s_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_s_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_hlg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_hlg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_cp_hlg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_hlg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_mv_hlg_from_fmt - procedure, pass(a) :: free => s_hlg_free - procedure, pass(a) :: mold => psb_s_hlg_mold - procedure, pass(a) :: is_host => s_hlg_is_host - procedure, pass(a) :: is_dev => s_hlg_is_dev - procedure, pass(a) :: is_sync => s_hlg_is_sync - procedure, pass(a) :: set_host => s_hlg_set_host - procedure, pass(a) :: set_dev => s_hlg_set_dev - procedure, pass(a) :: set_sync => s_hlg_set_sync - procedure, pass(a) :: sync => s_hlg_sync - procedure, pass(a) :: from_gpu => psb_s_hlg_from_gpu - procedure, pass(a) :: to_gpu => psb_s_hlg_to_gpu - final :: s_hlg_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => s_cuda_hlg_free + procedure, pass(a) :: mold => psb_s_cuda_hlg_mold + procedure, pass(a) :: is_host => s_cuda_hlg_is_host + procedure, pass(a) :: is_dev => s_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => s_cuda_hlg_is_sync + procedure, pass(a) :: set_host => s_cuda_hlg_set_host + procedure, pass(a) :: set_dev => s_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => s_cuda_hlg_set_sync + procedure, pass(a) :: sync => s_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_s_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_s_cuda_hlg_to_gpu + final :: s_cuda_hlg_finalize #else contains - procedure, pass(a) :: mold => psb_s_hlg_mold + procedure, pass(a) :: mold => psb_s_cuda_hlg_mold #endif - end type psb_s_hlg_sparse_mat + end type psb_s_cuda_hlg_sparse_mat #ifdef HAVE_SPGPU - private :: s_hlg_get_nzeros, s_hlg_free, s_hlg_get_fmt, & - & s_hlg_get_size, s_hlg_sizeof, s_hlg_get_nz_row + private :: s_cuda_hlg_get_nzeros, s_cuda_hlg_free, s_cuda_hlg_get_fmt, & + & s_cuda_hlg_get_size, s_cuda_hlg_sizeof, s_cuda_hlg_get_nz_row interface - subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hlg_vect_mv + end subroutine psb_s_cuda_hlg_vect_mv end interface interface - subroutine psb_s_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_s_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hlg_inner_vect_sv + end subroutine psb_s_cuda_hlg_inner_vect_sv end interface interface - subroutine psb_s_hlg_reallocate_nz(nz,a) - import :: psb_s_hlg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_hlg_reallocate_nz(nz,a) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_s_hlg_sparse_mat), intent(inout) :: a - end subroutine psb_s_hlg_reallocate_nz + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_hlg_reallocate_nz end interface interface - subroutine psb_s_hlg_allocate_mnnz(m,n,a,nz) - import :: psb_s_hlg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_s_hlg_allocate_mnnz + end subroutine psb_s_cuda_hlg_allocate_mnnz end interface interface - subroutine psb_s_hlg_mold(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_mold(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hlg_mold + end subroutine psb_s_cuda_hlg_mold end interface interface - subroutine psb_s_hlg_from_gpu(a,info) - import :: psb_s_hlg_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hlg_from_gpu(a,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hlg_from_gpu + end subroutine psb_s_cuda_hlg_from_gpu end interface interface - subroutine psb_s_hlg_to_gpu(a,info, nzrm) - import :: psb_s_hlg_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_s_hlg_to_gpu + end subroutine psb_s_cuda_hlg_to_gpu end interface interface - subroutine psb_s_cp_hlg_from_coo(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_hlg_from_coo + end subroutine psb_s_cuda_cp_hlg_from_coo end interface interface - subroutine psb_s_cp_hlg_from_fmt(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_hlg_from_fmt + end subroutine psb_s_cuda_cp_hlg_from_fmt end interface interface - subroutine psb_s_mv_hlg_from_coo(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_hlg_from_coo + end subroutine psb_s_cuda_mv_hlg_from_coo end interface interface - subroutine psb_s_mv_hlg_from_fmt(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_hlg_from_fmt + end subroutine psb_s_cuda_mv_hlg_from_fmt end interface interface - subroutine psb_s_hlg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hlg_csmv + end subroutine psb_s_cuda_hlg_csmv end interface interface - subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hlg_csmm + end subroutine psb_s_cuda_hlg_csmm end interface interface - subroutine psb_s_hlg_scal(d,a,info, side) - import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hlg_scal(d,a,info, side) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_s_hlg_scal + end subroutine psb_s_cuda_hlg_scal end interface interface - subroutine psb_s_hlg_scals(d,a,info) - import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hlg_scals(d,a,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hlg_scals + end subroutine psb_s_cuda_hlg_scals end interface @@ -252,9 +252,9 @@ contains ! == =================================== - function s_hlg_sizeof(a) result(res) + function s_cuda_hlg_sizeof(a) result(res) implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res @@ -269,13 +269,13 @@ contains ! on the GPU device side? ! res = 2*res - end function s_hlg_sizeof + end function s_cuda_hlg_sizeof - function s_hlg_get_fmt() result(res) + function s_cuda_hlg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HLG' - end function s_hlg_get_fmt + end function s_cuda_hlg_get_fmt @@ -291,11 +291,11 @@ contains ! ! == =================================== - subroutine s_hlg_free(a) + subroutine s_cuda_hlg_free(a) use hlldev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) @@ -304,13 +304,13 @@ contains return - end subroutine s_hlg_free + end subroutine s_cuda_hlg_free - subroutine s_hlg_sync(a) + subroutine s_cuda_hlg_sync(a) implicit none - class(psb_s_hlg_sparse_mat), target, intent(in) :: a - class(psb_s_hlg_sparse_mat), pointer :: tmpa + class(psb_s_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -322,77 +322,77 @@ contains call tmpa%set_sync() return - end subroutine s_hlg_sync + end subroutine s_cuda_hlg_sync - subroutine s_hlg_set_host(a) + subroutine s_cuda_hlg_set_host(a) implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine s_hlg_set_host + end subroutine s_cuda_hlg_set_host - subroutine s_hlg_set_dev(a) + subroutine s_cuda_hlg_set_dev(a) implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine s_hlg_set_dev + end subroutine s_cuda_hlg_set_dev - subroutine s_hlg_set_sync(a) + subroutine s_cuda_hlg_set_sync(a) implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine s_hlg_set_sync + end subroutine s_cuda_hlg_set_sync - function s_hlg_is_dev(a) result(res) + function s_cuda_hlg_is_dev(a) result(res) implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function s_hlg_is_dev + end function s_cuda_hlg_is_dev - function s_hlg_is_host(a) result(res) + function s_cuda_hlg_is_host(a) result(res) implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function s_hlg_is_host + end function s_cuda_hlg_is_host - function s_hlg_is_sync(a) result(res) + function s_cuda_hlg_is_sync(a) result(res) implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function s_hlg_is_sync + end function s_cuda_hlg_is_sync - subroutine s_hlg_finalize(a) + subroutine s_cuda_hlg_finalize(a) use hlldev_mod implicit none - type(psb_s_hlg_sparse_mat), intent(inout) :: a + type(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine s_hlg_finalize + end subroutine s_cuda_hlg_finalize #else interface - subroutine psb_s_hlg_mold(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_mold(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hlg_mold + end subroutine psb_s_cuda_hlg_mold end interface #endif -end module psb_s_hlg_mat_mod +end module psb_s_cuda_hlg_mat_mod diff --git a/cuda/psb_s_hybg_mat_mod.F90 b/cuda/psb_s_cuda_hybg_mat_mod.F90 similarity index 52% rename from cuda/psb_s_hybg_mat_mod.F90 rename to cuda/psb_s_cuda_hybg_mat_mod.F90 index 5a8e0e5d..ae76aac1 100644 --- a/cuda/psb_s_hybg_mat_mod.F90 +++ b/cuda/psb_s_cuda_hybg_mat_mod.F90 @@ -31,13 +31,13 @@ #if CUDA_SHORT_VERSION <= 10 -module psb_s_hybg_mat_mod +module psb_s_cuda_hybg_mat_mod use iso_c_binding use psb_s_mat_mod use cusparse_mod - type, extends(psb_s_csr_sparse_mat) :: psb_s_hybg_sparse_mat + type, extends(psb_s_csr_sparse_mat) :: psb_s_cuda_hybg_sparse_mat ! ! HYBG. An interface to the cuSPARSE HYB ! On the CPU side we keep a CSR storage. @@ -49,170 +49,170 @@ module psb_s_hybg_mat_mod type(s_Hmat) :: deviceMat contains - procedure, nopass :: get_fmt => s_hybg_get_fmt - procedure, pass(a) :: sizeof => s_hybg_sizeof - procedure, pass(a) :: vect_mv => psb_s_hybg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_s_hybg_inner_vect_sv - procedure, pass(a) :: csmm => psb_s_hybg_csmm - procedure, pass(a) :: csmv => psb_s_hybg_csmv - procedure, pass(a) :: scals => psb_s_hybg_scals - procedure, pass(a) :: scalv => psb_s_hybg_scal - procedure, pass(a) :: reallocate_nz => psb_s_hybg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_s_hybg_allocate_mnnz + procedure, nopass :: get_fmt => s_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_s_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_s_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_hybg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_hybg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_cp_hybg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_hybg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_mv_hybg_from_fmt - procedure, pass(a) :: free => s_hybg_free - procedure, pass(a) :: mold => psb_s_hybg_mold - procedure, pass(a) :: to_gpu => psb_s_hybg_to_gpu - final :: s_hybg_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => s_cuda_hybg_free + procedure, pass(a) :: mold => psb_s_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_hybg_to_gpu + final :: s_cuda_hybg_finalize #else contains - procedure, pass(a) :: mold => psb_s_hybg_mold + procedure, pass(a) :: mold => psb_s_cuda_hybg_mold #endif - end type psb_s_hybg_sparse_mat + end type psb_s_cuda_hybg_sparse_mat #ifdef HAVE_SPGPU - private :: s_hybg_get_nzeros, s_hybg_free, s_hybg_get_fmt, & - & s_hybg_get_size, s_hybg_sizeof, s_hybg_get_nz_row + private :: s_cuda_hybg_get_nzeros, s_cuda_hybg_free, s_cuda_hybg_get_fmt, & + & s_cuda_hybg_get_size, s_cuda_hybg_sizeof, s_cuda_hybg_get_nz_row interface - subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hybg_inner_vect_sv + end subroutine psb_s_cuda_hybg_inner_vect_sv end interface interface - subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hybg_vect_mv + end subroutine psb_s_cuda_hybg_vect_mv end interface interface - subroutine psb_s_hybg_reallocate_nz(nz,a) - import :: psb_s_hybg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_hybg_reallocate_nz(nz,a) + import :: psb_s_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_s_hybg_sparse_mat), intent(inout) :: a - end subroutine psb_s_hybg_reallocate_nz + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_hybg_reallocate_nz end interface interface - subroutine psb_s_hybg_allocate_mnnz(m,n,a,nz) - import :: psb_s_hybg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_s_hybg_allocate_mnnz + end subroutine psb_s_cuda_hybg_allocate_mnnz end interface interface - subroutine psb_s_hybg_mold(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_mold(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hybg_mold + end subroutine psb_s_cuda_hybg_mold end interface interface - subroutine psb_s_hybg_to_gpu(a,info, nzrm) - import :: psb_s_hybg_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_s_hybg_to_gpu + end subroutine psb_s_cuda_hybg_to_gpu end interface interface - subroutine psb_s_cp_hybg_from_coo(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_hybg_from_coo + end subroutine psb_s_cuda_cp_hybg_from_coo end interface interface - subroutine psb_s_cp_hybg_from_fmt(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_hybg_from_fmt + end subroutine psb_s_cuda_cp_hybg_from_fmt end interface interface - subroutine psb_s_mv_hybg_from_coo(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_hybg_from_coo + end subroutine psb_s_cuda_mv_hybg_from_coo end interface interface - subroutine psb_s_mv_hybg_from_fmt(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_hybg_from_fmt + end subroutine psb_s_cuda_mv_hybg_from_fmt end interface interface - subroutine psb_s_hybg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hybg_csmv + end subroutine psb_s_cuda_hybg_csmv end interface interface - subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hybg_csmm + end subroutine psb_s_cuda_hybg_csmm end interface interface - subroutine psb_s_hybg_scal(d,a,info,side) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hybg_scal(d,a,info,side) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_s_hybg_scal + end subroutine psb_s_cuda_hybg_scal end interface interface - subroutine psb_s_hybg_scals(d,a,info) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hybg_scals(d,a,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hybg_scals + end subroutine psb_s_cuda_hybg_scals end interface @@ -231,9 +231,9 @@ contains ! == =================================== - function s_hybg_sizeof(a) result(res) + function s_cuda_hybg_sizeof(a) result(res) implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 res = res + psb_sizeof_sp * size(a%val) @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function s_hybg_sizeof + end function s_cuda_hybg_sizeof - function s_hybg_get_fmt() result(res) + function s_cuda_hybg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HYBG' - end function s_hybg_get_fmt + end function s_cuda_hybg_get_fmt @@ -265,42 +265,42 @@ contains ! ! == =================================== - subroutine s_hybg_free(a) + subroutine s_cuda_hybg_free(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) call a%psb_s_csr_sparse_mat%free() return - end subroutine s_hybg_free + end subroutine s_cuda_hybg_free - subroutine s_hybg_finalize(a) + subroutine s_cuda_hybg_finalize(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - type(psb_s_hybg_sparse_mat), intent(inout) :: a + type(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) return - end subroutine s_hybg_finalize + end subroutine s_cuda_hybg_finalize #else interface - subroutine psb_s_hybg_mold(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_mold(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hybg_mold + end subroutine psb_s_cuda_hybg_mold end interface #endif -end module psb_s_hybg_mat_mod +end module psb_s_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_s_gpu_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 similarity index 72% rename from cuda/psb_s_gpu_vect_mod.F90 rename to cuda/psb_s_cuda_vect_mod.F90 index 1371db53..e19c980a 100644 --- a/cuda/psb_s_gpu_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -30,15 +30,15 @@ ! -module psb_s_gpu_vect_mod +module psb_s_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod use psb_s_vect_mod use psb_i_vect_mod #ifdef HAVE_SPGPU - use psb_gpu_env_mod - use psb_i_gpu_vect_mod + use psb_cuda_env_mod + use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_s_vectordev_mod #endif @@ -47,7 +47,7 @@ module psb_s_gpu_vect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_s_base_vect_type) :: psb_s_vect_gpu + type, extends(psb_s_base_vect_type) :: psb_s_vect_cuda #ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr @@ -59,66 +59,66 @@ module psb_s_gpu_vect_mod type(c_ptr) :: i_buf = c_null_ptr integer :: i_buf_sz = 0 contains - procedure, pass(x) :: get_nrows => s_gpu_get_nrows - procedure, nopass :: get_fmt => s_gpu_get_fmt - - procedure, pass(x) :: all => s_gpu_all - procedure, pass(x) :: zero => s_gpu_zero - procedure, pass(x) :: asb_m => s_gpu_asb_m - procedure, pass(x) :: sync => s_gpu_sync - procedure, pass(x) :: sync_space => s_gpu_sync_space - procedure, pass(x) :: bld_x => s_gpu_bld_x - procedure, pass(x) :: bld_mn => s_gpu_bld_mn - procedure, pass(x) :: free => s_gpu_free - procedure, pass(x) :: ins_a => s_gpu_ins_a - procedure, pass(x) :: ins_v => s_gpu_ins_v - procedure, pass(x) :: is_host => s_gpu_is_host - procedure, pass(x) :: is_dev => s_gpu_is_dev - procedure, pass(x) :: is_sync => s_gpu_is_sync - procedure, pass(x) :: set_host => s_gpu_set_host - procedure, pass(x) :: set_dev => s_gpu_set_dev - procedure, pass(x) :: set_sync => s_gpu_set_sync - procedure, pass(x) :: set_scal => s_gpu_set_scal -!!$ procedure, pass(x) :: set_vect => s_gpu_set_vect - procedure, pass(x) :: gthzv_x => s_gpu_gthzv_x - procedure, pass(y) :: sctb => s_gpu_sctb - procedure, pass(y) :: sctb_x => s_gpu_sctb_x - procedure, pass(x) :: gthzbuf => s_gpu_gthzbuf - procedure, pass(y) :: sctb_buf => s_gpu_sctb_buf - procedure, pass(x) :: new_buffer => s_gpu_new_buffer - procedure, nopass :: device_wait => s_gpu_device_wait - procedure, pass(x) :: free_buffer => s_gpu_free_buffer - procedure, pass(x) :: maybe_free_buffer => s_gpu_maybe_free_buffer - procedure, pass(x) :: dot_v => s_gpu_dot_v - procedure, pass(x) :: dot_a => s_gpu_dot_a - procedure, pass(y) :: axpby_v => s_gpu_axpby_v - procedure, pass(y) :: axpby_a => s_gpu_axpby_a - procedure, pass(y) :: mlt_v => s_gpu_mlt_v - procedure, pass(y) :: mlt_a => s_gpu_mlt_a - procedure, pass(z) :: mlt_a_2 => s_gpu_mlt_a_2 - procedure, pass(z) :: mlt_v_2 => s_gpu_mlt_v_2 - procedure, pass(x) :: scal => s_gpu_scal - procedure, pass(x) :: nrm2 => s_gpu_nrm2 - procedure, pass(x) :: amax => s_gpu_amax - procedure, pass(x) :: asum => s_gpu_asum - procedure, pass(x) :: absval1 => s_gpu_absval1 - procedure, pass(x) :: absval2 => s_gpu_absval2 - - final :: s_gpu_vect_finalize + procedure, pass(x) :: get_nrows => s_cuda_get_nrows + procedure, nopass :: get_fmt => s_cuda_get_fmt + + procedure, pass(x) :: all => s_cuda_all + procedure, pass(x) :: zero => s_cuda_zero + procedure, pass(x) :: asb_m => s_cuda_asb_m + procedure, pass(x) :: sync => s_cuda_sync + procedure, pass(x) :: sync_space => s_cuda_sync_space + procedure, pass(x) :: bld_x => s_cuda_bld_x + procedure, pass(x) :: bld_mn => s_cuda_bld_mn + procedure, pass(x) :: free => s_cuda_free + procedure, pass(x) :: ins_a => s_cuda_ins_a + procedure, pass(x) :: ins_v => s_cuda_ins_v + procedure, pass(x) :: is_host => s_cuda_is_host + procedure, pass(x) :: is_dev => s_cuda_is_dev + procedure, pass(x) :: is_sync => s_cuda_is_sync + procedure, pass(x) :: set_host => s_cuda_set_host + procedure, pass(x) :: set_dev => s_cuda_set_dev + procedure, pass(x) :: set_sync => s_cuda_set_sync + procedure, pass(x) :: set_scal => s_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => s_cuda_set_vect + procedure, pass(x) :: gthzv_x => s_cuda_gthzv_x + procedure, pass(y) :: sctb => s_cuda_sctb + procedure, pass(y) :: sctb_x => s_cuda_sctb_x + procedure, pass(x) :: gthzbuf => s_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => s_cuda_sctb_buf + procedure, pass(x) :: new_buffer => s_cuda_new_buffer + procedure, nopass :: device_wait => s_cuda_device_wait + procedure, pass(x) :: free_buffer => s_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => s_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => s_cuda_dot_v + procedure, pass(x) :: dot_a => s_cuda_dot_a + procedure, pass(y) :: axpby_v => s_cuda_axpby_v + procedure, pass(y) :: axpby_a => s_cuda_axpby_a + procedure, pass(y) :: mlt_v => s_cuda_mlt_v + procedure, pass(y) :: mlt_a => s_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => s_cuda_mlt_v_2 + procedure, pass(x) :: scal => s_cuda_scal + procedure, pass(x) :: nrm2 => s_cuda_nrm2 + procedure, pass(x) :: amax => s_cuda_amax + procedure, pass(x) :: asum => s_cuda_asum + procedure, pass(x) :: absval1 => s_cuda_absval1 + procedure, pass(x) :: absval2 => s_cuda_absval2 + + final :: s_cuda_vect_finalize #endif - end type psb_s_vect_gpu + end type psb_s_vect_cuda - public :: psb_s_vect_gpu_ + public :: psb_s_vect_cuda_ private :: constructor - interface psb_s_vect_gpu_ + interface psb_s_vect_cuda_ module procedure constructor - end interface psb_s_vect_gpu_ + end interface psb_s_vect_cuda_ contains function constructor(x) result(this) real(psb_spk_) :: x(:) - type(psb_s_vect_gpu) :: this + type(psb_s_vect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -128,20 +128,20 @@ contains #ifdef HAVE_SPGPU - subroutine s_gpu_device_wait() + subroutine s_cuda_device_wait() call psb_cudaSync() - end subroutine s_gpu_device_wait + end subroutine s_cuda_device_wait - subroutine s_gpu_new_buffer(n,x,info) + subroutine s_cuda_new_buffer(n,x,info) use psb_realloc_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(x%combuf)) then if (size(x%combuf) idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then ! ! Only need a sync in this branch; in the others ! cudamemCpy acts as a sync point. @@ -331,14 +331,14 @@ contains end select - end subroutine s_gpu_gthzv_x + end subroutine s_cuda_gthzv_x - subroutine s_gpu_gthzbuf(i,n,idx,x) - use psb_gpu_env_mod + subroutine s_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i,n class(psb_i_base_vect_type) :: idx - class(psb_s_vect_gpu) :: x + class(psb_s_vect_cuda) :: x integer :: info, ni info = 0 @@ -349,11 +349,11 @@ contains end if select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) @@ -384,14 +384,14 @@ contains end select - end subroutine s_gpu_gthzbuf + end subroutine s_cuda_gthzbuf - subroutine s_gpu_sctb(n,idx,x,beta,y) + subroutine s_cuda_sctb(n,idx,x,beta,y) implicit none !use psb_const_mod integer(psb_ipk_) :: n, idx(:) real(psb_spk_) :: beta, x(:) - class(psb_s_vect_gpu) :: y + class(psb_s_vect_cuda) :: y integer(psb_ipk_) :: info if (n == 0) return @@ -401,24 +401,24 @@ contains call y%psb_s_base_vect_type%sctb(n,idx,x,beta) call y%set_host() - end subroutine s_gpu_sctb + end subroutine s_cuda_sctb - subroutine s_gpu_sctb_x(i,n,idx,x,beta,y) - use psb_gpu_env_mod + subroutine s_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx real(psb_spk_) :: beta, x(:) - class(psb_s_vect_gpu) :: y + class(psb_s_vect_cuda) :: y integer :: info, ni select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() ! - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(y%pinned_buffer)) then if (size(y%pinned_buffer) < n) then call inner_unregister(y%pinned_buffer) @@ -506,16 +506,16 @@ contains call psb_cudaSync() call y%set_dev() - end subroutine s_gpu_sctb_x + end subroutine s_cuda_sctb_x - subroutine s_gpu_sctb_buf(i,n,idx,beta,y) + subroutine s_cuda_sctb_buf(i,n,idx,beta,y) use psi_serial_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx real(psb_spk_) :: beta - class(psb_s_vect_gpu) :: y + class(psb_s_vect_cuda) :: y integer(psb_ipk_) :: info, ni !!$ write(0,*) 'Starting sctb_buf' @@ -526,11 +526,11 @@ contains select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) else @@ -557,106 +557,106 @@ contains end select !!$ write(0,*) 'Done sctb_buf' - end subroutine s_gpu_sctb_buf + end subroutine s_cuda_sctb_buf - subroutine s_gpu_bld_x(x,this) + subroutine s_cuda_bld_x(x,this) use psb_base_mod real(psb_spk_), intent(in) :: this(:) - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call psb_realloc(size(this),x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'s_gpu_bld_x',& + call psb_errpush(info,'s_cuda_bld_x',& & i_err=(/size(this),izero,izero,izero,izero/)) end if x%v(:) = this(:) call x%set_host() call x%sync() - end subroutine s_gpu_bld_x + end subroutine s_cuda_bld_x - subroutine s_gpu_bld_mn(x,n) + subroutine s_cuda_bld_mn(x,n) integer(psb_mpk_), intent(in) :: n - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(n,info) if (info /= 0) then - call psb_errpush(info,'s_gpu_bld_n',i_err=(/n,n,n,n,n/)) + call psb_errpush(info,'s_cuda_bld_n',i_err=(/n,n,n,n,n/)) end if - end subroutine s_gpu_bld_mn + end subroutine s_cuda_bld_mn - subroutine s_gpu_set_host(x) + subroutine s_cuda_set_host(x) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x x%state = is_host - end subroutine s_gpu_set_host + end subroutine s_cuda_set_host - subroutine s_gpu_set_dev(x) + subroutine s_cuda_set_dev(x) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x x%state = is_dev - end subroutine s_gpu_set_dev + end subroutine s_cuda_set_dev - subroutine s_gpu_set_sync(x) + subroutine s_cuda_set_sync(x) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x x%state = is_sync - end subroutine s_gpu_set_sync + end subroutine s_cuda_set_sync - function s_gpu_is_dev(x) result(res) + function s_cuda_is_dev(x) result(res) implicit none - class(psb_s_vect_gpu), intent(in) :: x + class(psb_s_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function s_gpu_is_dev + end function s_cuda_is_dev - function s_gpu_is_host(x) result(res) + function s_cuda_is_host(x) result(res) implicit none - class(psb_s_vect_gpu), intent(in) :: x + class(psb_s_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function s_gpu_is_host + end function s_cuda_is_host - function s_gpu_is_sync(x) result(res) + function s_cuda_is_sync(x) result(res) implicit none - class(psb_s_vect_gpu), intent(in) :: x + class(psb_s_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function s_gpu_is_sync + end function s_cuda_is_sync - function s_gpu_get_nrows(x) result(res) + function s_cuda_get_nrows(x) result(res) implicit none - class(psb_s_vect_gpu), intent(in) :: x + class(psb_s_vect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = size(x%v) - end function s_gpu_get_nrows + end function s_cuda_get_nrows - function s_gpu_get_fmt() result(res) + function s_cuda_get_fmt() result(res) implicit none character(len=5) :: res res = 'sGPU' - end function s_gpu_get_fmt + end function s_cuda_get_fmt - subroutine s_gpu_all(n, x, info) + subroutine s_cuda_all(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: n - class(psb_s_vect_gpu), intent(out) :: x + class(psb_s_vect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(n,x%v,info) @@ -664,26 +664,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'s_gpu_all',& + call psb_errpush(info,'s_cuda_all',& & i_err=(/n,n,n,n,n/)) end if - end subroutine s_gpu_all + end subroutine s_cuda_all - subroutine s_gpu_zero(x) + subroutine s_cuda_zero(x) use psi_serial_mod implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=szero call x%set_host() - end subroutine s_gpu_zero + end subroutine s_cuda_zero - subroutine s_gpu_asb_m(n, x, info) + subroutine s_cuda_asb_m(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_mpk_), intent(in) :: n - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: nd @@ -703,12 +703,12 @@ contains end if end if - end subroutine s_gpu_asb_m + end subroutine s_cuda_asb_m - subroutine s_gpu_sync_space(x,info) + subroutine s_cuda_sync_space(x,info) use psb_base_mod, only : psb_realloc implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh, nd @@ -747,12 +747,12 @@ contains end if end if - end subroutine s_gpu_sync_space + end subroutine s_cuda_sync_space - subroutine s_gpu_sync(x) + subroutine s_cuda_sync(x) use psb_base_mod, only : psb_realloc implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -778,31 +778,31 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'s_gpu_sync') + call psb_errpush(info,'s_cuda_sync') end if - end subroutine s_gpu_sync + end subroutine s_cuda_sync - subroutine s_gpu_free(x, info) + subroutine s_cuda_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) if (c_associated(x%deviceVect)) then -!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' call freeMultiVecDevice(x%deviceVect) x%deviceVect=c_null_ptr end if call x%free_buffer(info) call x%set_sync() - end subroutine s_gpu_free + end subroutine s_cuda_free - subroutine s_gpu_set_scal(x,val,first,last) - class(psb_s_vect_gpu), intent(inout) :: x + subroutine s_cuda_set_scal(x,val,first,last) + class(psb_s_vect_cuda), intent(inout) :: x real(psb_spk_), intent(in) :: val integer(psb_ipk_), optional :: first, last @@ -817,10 +817,10 @@ contains info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() - end subroutine s_gpu_set_scal + end subroutine s_cuda_set_scal !!$ -!!$ subroutine s_gpu_set_vect(x,val) -!!$ class(psb_s_vect_gpu), intent(inout) :: x +!!$ subroutine s_cuda_set_vect(x,val) +!!$ class(psb_s_vect_cuda), intent(inout) :: x !!$ real(psb_spk_), intent(in) :: val(:) !!$ integer(psb_ipk_) :: nr !!$ integer(psb_ipk_) :: info @@ -829,13 +829,13 @@ contains !!$ call x%psb_s_base_vect_type%set_vect(val) !!$ call x%set_host() !!$ -!!$ end subroutine s_gpu_set_vect +!!$ end subroutine s_cuda_set_vect - function s_gpu_dot_v(n,x,y) result(res) + function s_cuda_dot_v(n,x,y) result(res) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res @@ -852,13 +852,13 @@ contains type is (psb_s_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n,x%v,1,yy%v,1) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) if (info /= 0) then info = psb_err_internal_error_ - call psb_errpush(info,'s_gpu_dot_v') + call psb_errpush(info,'s_cuda_dot_v') end if class default @@ -867,11 +867,11 @@ contains res = y%dot(n,x%v) end select - end function s_gpu_dot_v + end function s_cuda_dot_v - function s_gpu_dot_a(n,x,y) result(res) + function s_cuda_dot_a(n,x,y) result(res) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x real(psb_spk_), intent(in) :: y(:) integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res @@ -880,14 +880,14 @@ contains if (x%is_dev()) call x%sync() res = ddot(n,y,1,x%v,1) - end function s_gpu_dot_a + end function s_cuda_dot_a - subroutine s_gpu_axpby_v(m,alpha, x, beta, y, info) + subroutine s_cuda_axpby_v(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m class(psb_s_base_vect_type), intent(inout) :: x - class(psb_s_vect_gpu), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: y real(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny @@ -895,7 +895,7 @@ contains info = psb_success_ select type(xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) ! Do something different here if ((beta /= szero).and.y%is_host())& & call y%sync() @@ -915,14 +915,14 @@ contains call y%axpby(m,alpha,x%v,beta,info) end select - end subroutine s_gpu_axpby_v + end subroutine s_cuda_axpby_v - subroutine s_gpu_axpby_a(m,alpha, x, beta, y, info) + subroutine s_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m real(psb_spk_), intent(in) :: x(:) - class(psb_s_vect_gpu), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: y real(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info @@ -930,13 +930,13 @@ contains & call y%sync() call psb_geaxpby(m,alpha,x,beta,y%v,info) call y%set_host() - end subroutine s_gpu_axpby_a + end subroutine s_cuda_axpby_a - subroutine s_gpu_mlt_v(x, y, info) + subroutine s_cuda_mlt_v(x, y, info) use psi_serial_mod implicit none class(psb_s_base_vect_type), intent(inout) :: x - class(psb_s_vect_gpu), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -950,7 +950,7 @@ contains y%v(i) = y%v(i) * xx%v(i) end do call y%set_host() - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) ! Do something different here if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -963,13 +963,13 @@ contains call y%set_host() end select - end subroutine s_gpu_mlt_v + end subroutine s_cuda_mlt_v - subroutine s_gpu_mlt_a(x, y, info) + subroutine s_cuda_mlt_a(x, y, info) use psi_serial_mod implicit none real(psb_spk_), intent(in) :: x(:) - class(psb_s_vect_gpu), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -977,15 +977,15 @@ contains if (y%is_dev()) call y%sync() call y%psb_s_base_vect_type%mlt(x,info) ! set_host() is invoked in the base method - end subroutine s_gpu_mlt_a + end subroutine s_cuda_mlt_a - subroutine s_gpu_mlt_a_2(alpha,x,y,beta,z,info) + subroutine s_cuda_mlt_a_2(alpha,x,y,beta,z,info) use psi_serial_mod implicit none real(psb_spk_), intent(in) :: alpha,beta real(psb_spk_), intent(in) :: x(:) real(psb_spk_), intent(in) :: y(:) - class(psb_s_vect_gpu), intent(inout) :: z + class(psb_s_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -993,16 +993,16 @@ contains if (z%is_dev()) call z%sync() call z%psb_s_base_vect_type%mlt(alpha,x,y,beta,info) ! set_host() is invoked in the base method - end subroutine s_gpu_mlt_a_2 + end subroutine s_cuda_mlt_a_2 - subroutine s_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + subroutine s_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) use psi_serial_mod use psb_string_mod implicit none real(psb_spk_), intent(in) :: alpha,beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y - class(psb_s_vect_gpu), intent(inout) :: z + class(psb_s_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n @@ -1025,9 +1025,9 @@ contains ! info = 0 select type(xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type (yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= szero).and.(z%is_host())) call z%sync() @@ -1049,23 +1049,23 @@ contains call z%psb_s_base_vect_type%mlt(alpha,x,y,beta,info) call z%set_host() end select - end subroutine s_gpu_mlt_v_2 + end subroutine s_cuda_mlt_v_2 - subroutine s_gpu_scal(alpha, x) + subroutine s_cuda_scal(alpha, x) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x real(psb_spk_), intent (in) :: alpha integer(psb_ipk_) :: info if (x%is_host()) call x%sync() info = scalMultiVecDevice(alpha,x%deviceVect) call x%set_dev() - end subroutine s_gpu_scal + end subroutine s_cuda_scal - function s_gpu_nrm2(n,x) result(res) + function s_cuda_nrm2(n,x) result(res) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1073,11 +1073,11 @@ contains if (x%is_host()) call x%sync() info = nrm2MultiVecDevice(res,n,x%deviceVect) - end function s_gpu_nrm2 + end function s_cuda_nrm2 - function s_gpu_amax(n,x) result(res) + function s_cuda_amax(n,x) result(res) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1085,11 +1085,11 @@ contains if (x%is_host()) call x%sync() info = amaxMultiVecDevice(res,n,x%deviceVect) - end function s_gpu_amax + end function s_cuda_amax - function s_gpu_asum(n,x) result(res) + function s_cuda_asum(n,x) result(res) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1097,11 +1097,11 @@ contains if (x%is_host()) call x%sync() info = asumMultiVecDevice(res,n,x%deviceVect) - end function s_gpu_asum + end function s_cuda_asum - subroutine s_gpu_absval1(x) + subroutine s_cuda_absval1(x) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n integer(psb_ipk_) :: info @@ -1109,18 +1109,18 @@ contains n=x%get_nrows() info = absMultiVecDevice(n,sone,x%deviceVect) - end subroutine s_gpu_absval1 + end subroutine s_cuda_absval1 - subroutine s_gpu_absval2(x,y) + subroutine s_cuda_absval2(x,y) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_) :: n integer(psb_ipk_) :: info n=min(x%get_nrows(),y%get_nrows()) select type (yy=> y) - class is (psb_s_vect_gpu) + class is (psb_s_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = absMultiVecDevice(n,sone,x%deviceVect,yy%deviceVect) @@ -1129,67 +1129,67 @@ contains if (y%is_dev()) call y%sync() call x%psb_s_base_vect_type%absval(y) end select - end subroutine s_gpu_absval2 + end subroutine s_cuda_absval2 - subroutine s_gpu_vect_finalize(x) + subroutine s_cuda_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_s_vect_gpu), intent(inout) :: x + type(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 call x%free(info) - end subroutine s_gpu_vect_finalize + end subroutine s_cuda_vect_finalize - subroutine s_gpu_ins_v(n,irl,val,dupl,x,info) + subroutine s_cuda_ins_v(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl class(psb_i_base_vect_type), intent(inout) :: irl class(psb_s_base_vect_type), intent(inout) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, isz - logical :: done_gpu + logical :: done_cuda info = 0 if (psb_errstatus_fatal()) return - done_gpu = .false. + done_cuda = .false. select type(virl => irl) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type(vval => val) - class is (psb_s_vect_gpu) + class is (psb_s_vect_cuda) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() info = geinsMultiVecDeviceFloat(n,virl%deviceVect,& & vval%deviceVect,dupl,1,x%deviceVect) call x%set_dev() - done_gpu=.true. + done_cuda=.true. end select end select - if (.not.done_gpu) then + if (.not.done_cuda) then if (irl%is_dev()) call irl%sync() if (val%is_dev()) call val%sync() call x%ins(n,irl%v,val%v,dupl,info) end if if (info /= 0) then - call psb_errpush(info,'gpu_vect_ins') + call psb_errpush(info,'cuda_vect_ins') return end if - end subroutine s_gpu_ins_v + end subroutine s_cuda_ins_v - subroutine s_gpu_ins_a(n,irl,val,dupl,x,info) + subroutine s_cuda_ins_a(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) real(psb_spk_), intent(in) :: val(:) @@ -1202,11 +1202,11 @@ contains call x%psb_s_base_vect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine s_gpu_ins_a + end subroutine s_cuda_ins_a #endif -end module psb_s_gpu_vect_mod +end module psb_s_cuda_vect_mod ! @@ -1215,7 +1215,7 @@ end module psb_s_gpu_vect_mod -module psb_s_gpu_multivect_mod +module psb_s_cuda_multivect_mod use iso_c_binding use psb_const_mod use psb_error_mod @@ -1224,7 +1224,7 @@ module psb_s_gpu_multivect_mod use psb_i_multivect_mod #ifdef HAVE_SPGPU - use psb_i_gpu_multivect_mod + use psb_i_cuda_multivect_mod use psb_s_vectordev_mod #endif @@ -1232,7 +1232,7 @@ module psb_s_gpu_multivect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_s_base_multivect_type) :: psb_s_multivect_gpu + type, extends(psb_s_base_multivect_type) :: psb_s_multivect_cuda #ifdef HAVE_SPGPU integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 @@ -1240,48 +1240,48 @@ module psb_s_gpu_multivect_mod real(c_double), allocatable :: buffer(:,:) type(c_ptr) :: dt_buf = c_null_ptr contains - procedure, pass(x) :: get_nrows => s_gpu_multi_get_nrows - procedure, pass(x) :: get_ncols => s_gpu_multi_get_ncols - procedure, nopass :: get_fmt => s_gpu_multi_get_fmt -!!$ procedure, pass(x) :: dot_v => s_gpu_multi_dot_v -!!$ procedure, pass(x) :: dot_a => s_gpu_multi_dot_a -!!$ procedure, pass(y) :: axpby_v => s_gpu_multi_axpby_v -!!$ procedure, pass(y) :: axpby_a => s_gpu_multi_axpby_a -!!$ procedure, pass(y) :: mlt_v => s_gpu_multi_mlt_v -!!$ procedure, pass(y) :: mlt_a => s_gpu_multi_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => s_gpu_multi_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => s_gpu_multi_mlt_v_2 -!!$ procedure, pass(x) :: scal => s_gpu_multi_scal -!!$ procedure, pass(x) :: nrm2 => s_gpu_multi_nrm2 -!!$ procedure, pass(x) :: amax => s_gpu_multi_amax -!!$ procedure, pass(x) :: asum => s_gpu_multi_asum - procedure, pass(x) :: all => s_gpu_multi_all - procedure, pass(x) :: zero => s_gpu_multi_zero - procedure, pass(x) :: asb => s_gpu_multi_asb - procedure, pass(x) :: sync => s_gpu_multi_sync - procedure, pass(x) :: sync_space => s_gpu_multi_sync_space - procedure, pass(x) :: bld_x => s_gpu_multi_bld_x - procedure, pass(x) :: bld_n => s_gpu_multi_bld_n - procedure, pass(x) :: free => s_gpu_multi_free - procedure, pass(x) :: ins => s_gpu_multi_ins - procedure, pass(x) :: is_host => s_gpu_multi_is_host - procedure, pass(x) :: is_dev => s_gpu_multi_is_dev - procedure, pass(x) :: is_sync => s_gpu_multi_is_sync - procedure, pass(x) :: set_host => s_gpu_multi_set_host - procedure, pass(x) :: set_dev => s_gpu_multi_set_dev - procedure, pass(x) :: set_sync => s_gpu_multi_set_sync - procedure, pass(x) :: set_scal => s_gpu_multi_set_scal - procedure, pass(x) :: set_vect => s_gpu_multi_set_vect -!!$ procedure, pass(x) :: gthzv_x => s_gpu_multi_gthzv_x -!!$ procedure, pass(y) :: sctb => s_gpu_multi_sctb -!!$ procedure, pass(y) :: sctb_x => s_gpu_multi_sctb_x - final :: s_gpu_multi_vect_finalize + procedure, pass(x) :: get_nrows => s_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => s_cuda_multi_get_ncols + procedure, nopass :: get_fmt => s_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => s_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => s_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => s_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => s_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => s_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => s_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => s_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => s_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => s_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => s_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => s_cuda_multi_amax +!!$ procedure, pass(x) :: asum => s_cuda_multi_asum + procedure, pass(x) :: all => s_cuda_multi_all + procedure, pass(x) :: zero => s_cuda_multi_zero + procedure, pass(x) :: asb => s_cuda_multi_asb + procedure, pass(x) :: sync => s_cuda_multi_sync + procedure, pass(x) :: sync_space => s_cuda_multi_sync_space + procedure, pass(x) :: bld_x => s_cuda_multi_bld_x + procedure, pass(x) :: bld_n => s_cuda_multi_bld_n + procedure, pass(x) :: free => s_cuda_multi_free + procedure, pass(x) :: ins => s_cuda_multi_ins + procedure, pass(x) :: is_host => s_cuda_multi_is_host + procedure, pass(x) :: is_dev => s_cuda_multi_is_dev + procedure, pass(x) :: is_sync => s_cuda_multi_is_sync + procedure, pass(x) :: set_host => s_cuda_multi_set_host + procedure, pass(x) :: set_dev => s_cuda_multi_set_dev + procedure, pass(x) :: set_sync => s_cuda_multi_set_sync + procedure, pass(x) :: set_scal => s_cuda_multi_set_scal + procedure, pass(x) :: set_vect => s_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => s_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => s_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => s_cuda_multi_sctb_x + final :: s_cuda_multi_vect_finalize #endif - end type psb_s_multivect_gpu + end type psb_s_multivect_cuda - public :: psb_s_multivect_gpu + public :: psb_s_multivect_cuda private :: constructor - interface psb_s_multivect_gpu + interface psb_s_multivect_cuda module procedure constructor end interface @@ -1289,7 +1289,7 @@ contains function constructor(x) result(this) real(psb_spk_) :: x(:,:) - type(psb_s_multivect_gpu) :: this + type(psb_s_multivect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -1299,15 +1299,15 @@ contains #ifdef HAVE_SPGPU -!!$ subroutine s_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ subroutine s_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i,n !!$ class(psb_i_base_multivect_type) :: idx !!$ real(psb_spk_) :: y(:) -!!$ class(psb_s_multivect_gpu) :: x +!!$ class(psb_s_multivect_cuda) :: x !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (x%is_host()) call x%sync() !!$ @@ -1332,16 +1332,16 @@ contains !!$ end select !!$ !!$ -!!$ end subroutine s_gpu_multi_gthzv_x +!!$ end subroutine s_cuda_multi_gthzv_x !!$ !!$ !!$ -!!$ subroutine s_gpu_multi_sctb(n,idx,x,beta,y) +!!$ subroutine s_cuda_multi_sctb(n,idx,x,beta,y) !!$ implicit none !!$ !use psb_const_mod !!$ integer(psb_ipk_) :: n, idx(:) !!$ real(psb_spk_) :: beta, x(:) -!!$ class(psb_s_multivect_gpu) :: y +!!$ class(psb_s_multivect_cuda) :: y !!$ integer(psb_ipk_) :: info !!$ !!$ if (n == 0) return @@ -1351,17 +1351,17 @@ contains !!$ call y%psb_s_base_multivect_type%sctb(n,idx,x,beta) !!$ call y%set_host() !!$ -!!$ end subroutine s_gpu_multi_sctb +!!$ end subroutine s_cuda_multi_sctb !!$ -!!$ subroutine s_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ subroutine s_cuda_multi_sctb_x(i,n,idx,x,beta,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i, n !!$ class(psb_i_base_multivect_type) :: idx !!$ real(psb_spk_) :: beta, x(:) -!!$ class(psb_s_multivect_gpu) :: y +!!$ class(psb_s_multivect_cuda) :: y !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (y%is_host()) call y%sync() !!$ @@ -1387,13 +1387,13 @@ contains !!$ call y%sct(n,ii%v(i:),x,beta) !!$ end select !!$ -!!$ end subroutine s_gpu_multi_sctb_x +!!$ end subroutine s_cuda_multi_sctb_x - subroutine s_gpu_multi_bld_x(x,this) + subroutine s_cuda_multi_bld_x(x,this) use psb_base_mod real(psb_spk_), intent(in) :: this(:,:) - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info, m, n m=size(this,1) @@ -1403,101 +1403,101 @@ contains call psb_realloc(m,n,x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'s_gpu_multi_bld_x',& + call psb_errpush(info,'s_cuda_multi_bld_x',& & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) end if x%v(1:m,1:n) = this(1:m,1:n) call x%set_host() call x%sync() - end subroutine s_gpu_multi_bld_x + end subroutine s_cuda_multi_bld_x - subroutine s_gpu_multi_bld_n(x,m,n) + subroutine s_cuda_multi_bld_n(x,m,n) integer(psb_ipk_), intent(in) :: m,n - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(m,n,info) if (info /= 0) then - call psb_errpush(info,'s_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + call psb_errpush(info,'s_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) end if - end subroutine s_gpu_multi_bld_n + end subroutine s_cuda_multi_bld_n - subroutine s_gpu_multi_set_host(x) + subroutine s_cuda_multi_set_host(x) implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x x%state = is_host - end subroutine s_gpu_multi_set_host + end subroutine s_cuda_multi_set_host - subroutine s_gpu_multi_set_dev(x) + subroutine s_cuda_multi_set_dev(x) implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x x%state = is_dev - end subroutine s_gpu_multi_set_dev + end subroutine s_cuda_multi_set_dev - subroutine s_gpu_multi_set_sync(x) + subroutine s_cuda_multi_set_sync(x) implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x x%state = is_sync - end subroutine s_gpu_multi_set_sync + end subroutine s_cuda_multi_set_sync - function s_gpu_multi_is_dev(x) result(res) + function s_cuda_multi_is_dev(x) result(res) implicit none - class(psb_s_multivect_gpu), intent(in) :: x + class(psb_s_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function s_gpu_multi_is_dev + end function s_cuda_multi_is_dev - function s_gpu_multi_is_host(x) result(res) + function s_cuda_multi_is_host(x) result(res) implicit none - class(psb_s_multivect_gpu), intent(in) :: x + class(psb_s_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function s_gpu_multi_is_host + end function s_cuda_multi_is_host - function s_gpu_multi_is_sync(x) result(res) + function s_cuda_multi_is_sync(x) result(res) implicit none - class(psb_s_multivect_gpu), intent(in) :: x + class(psb_s_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function s_gpu_multi_is_sync + end function s_cuda_multi_is_sync - function s_gpu_multi_get_nrows(x) result(res) + function s_cuda_multi_get_nrows(x) result(res) implicit none - class(psb_s_multivect_gpu), intent(in) :: x + class(psb_s_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_nrows - end function s_gpu_multi_get_nrows + end function s_cuda_multi_get_nrows - function s_gpu_multi_get_ncols(x) result(res) + function s_cuda_multi_get_ncols(x) result(res) implicit none - class(psb_s_multivect_gpu), intent(in) :: x + class(psb_s_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_ncols - end function s_gpu_multi_get_ncols + end function s_cuda_multi_get_ncols - function s_gpu_multi_get_fmt() result(res) + function s_cuda_multi_get_fmt() result(res) implicit none character(len=5) :: res res = 'sGPU' - end function s_gpu_multi_get_fmt + end function s_cuda_multi_get_fmt -!!$ function s_gpu_multi_dot_v(n,x,y) result(res) +!!$ function s_cuda_multi_dot_v(n,x,y) result(res) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ class(psb_s_base_multivect_type), intent(inout) :: y !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res @@ -1514,13 +1514,13 @@ contains !!$ type is (psb_s_base_multivect_type) !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,x%v,1,yy%v,1) -!!$ type is (psb_s_multivect_gpu) +!!$ type is (psb_s_multivect_cuda) !!$ if (x%is_host()) call x%sync() !!$ if (yy%is_host()) call yy%sync() !!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) !!$ if (info /= 0) then !!$ info = psb_err_internal_error_ -!!$ call psb_errpush(info,'s_gpu_multi_dot_v') +!!$ call psb_errpush(info,'s_cuda_multi_dot_v') !!$ end if !!$ !!$ class default @@ -1529,11 +1529,11 @@ contains !!$ res = y%dot(n,x%v) !!$ end select !!$ -!!$ end function s_gpu_multi_dot_v +!!$ end function s_cuda_multi_dot_v !!$ -!!$ function s_gpu_multi_dot_a(n,x,y) result(res) +!!$ function s_cuda_multi_dot_a(n,x,y) result(res) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ real(psb_spk_), intent(in) :: y(:) !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res @@ -1542,14 +1542,14 @@ contains !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,y,1,x%v,1) !!$ -!!$ end function s_gpu_multi_dot_a +!!$ end function s_cuda_multi_dot_a !!$ -!!$ subroutine s_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine s_cuda_multi_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ class(psb_s_base_multivect_type), intent(inout) :: x -!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ class(psb_s_multivect_cuda), intent(inout) :: y !!$ real(psb_spk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: nx, ny @@ -1562,7 +1562,7 @@ contains !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() -!!$ type is (psb_s_multivect_gpu) +!!$ type is (psb_s_multivect_cuda) !!$ ! Do something different here !!$ if ((beta /= dzero).and.y%is_host())& !!$ & call y%sync() @@ -1581,27 +1581,27 @@ contains !!$ call y%axpby(m,alpha,x%v,beta,info) !!$ end select !!$ -!!$ end subroutine s_gpu_multi_axpby_v +!!$ end subroutine s_cuda_multi_axpby_v !!$ -!!$ subroutine s_gpu_multi_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine s_cuda_multi_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ real(psb_spk_), intent(in) :: x(:) -!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ class(psb_s_multivect_cuda), intent(inout) :: y !!$ real(psb_spk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ if (y%is_dev()) call y%sync() !!$ call psb_geaxpby(m,alpha,x,beta,y%v,info) !!$ call y%set_host() -!!$ end subroutine s_gpu_multi_axpby_a +!!$ end subroutine s_cuda_multi_axpby_a !!$ -!!$ subroutine s_gpu_multi_mlt_v(x, y, info) +!!$ subroutine s_cuda_multi_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_s_base_multivect_type), intent(inout) :: x -!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ class(psb_s_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ integer(psb_ipk_) :: i, n @@ -1615,7 +1615,7 @@ contains !!$ y%v(i) = y%v(i) * xx%v(i) !!$ end do !!$ call y%set_host() -!!$ type is (psb_s_multivect_gpu) +!!$ type is (psb_s_multivect_cuda) !!$ ! Do something different here !!$ if (y%is_host()) call y%sync() !!$ if (xx%is_host()) call xx%sync() @@ -1627,13 +1627,13 @@ contains !!$ call y%set_host() !!$ end select !!$ -!!$ end subroutine s_gpu_multi_mlt_v +!!$ end subroutine s_cuda_multi_mlt_v !!$ -!!$ subroutine s_gpu_multi_mlt_a(x, y, info) +!!$ subroutine s_cuda_multi_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_spk_), intent(in) :: x(:) -!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ class(psb_s_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1641,15 +1641,15 @@ contains !!$ call y%sync() !!$ call y%psb_s_base_multivect_type%mlt(x,info) !!$ call y%set_host() -!!$ end subroutine s_gpu_multi_mlt_a +!!$ end subroutine s_cuda_multi_mlt_a !!$ -!!$ subroutine s_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine s_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_spk_), intent(in) :: alpha,beta !!$ real(psb_spk_), intent(in) :: x(:) !!$ real(psb_spk_), intent(in) :: y(:) -!!$ class(psb_s_multivect_gpu), intent(inout) :: z +!!$ class(psb_s_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1657,16 +1657,16 @@ contains !!$ if (z%is_dev()) call z%sync() !!$ call z%psb_s_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() -!!$ end subroutine s_gpu_multi_mlt_a_2 +!!$ end subroutine s_cuda_multi_mlt_a_2 !!$ -!!$ subroutine s_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ subroutine s_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod !!$ implicit none !!$ real(psb_spk_), intent(in) :: alpha,beta !!$ class(psb_s_base_multivect_type), intent(inout) :: x !!$ class(psb_s_base_multivect_type), intent(inout) :: y -!!$ class(psb_s_multivect_gpu), intent(inout) :: z +!!$ class(psb_s_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ character(len=1), intent(in), optional :: conjgx, conjgy !!$ integer(psb_ipk_) :: i, n @@ -1689,9 +1689,9 @@ contains !!$ ! !!$ info = 0 !!$ select type(xx => x) -!!$ type is (psb_s_multivect_gpu) +!!$ type is (psb_s_multivect_cuda) !!$ select type (yy => y) -!!$ type is (psb_s_multivect_gpu) +!!$ type is (psb_s_multivect_cuda) !!$ if (xx%is_host()) call xx%sync() !!$ if (yy%is_host()) call yy%sync() !!$ ! Z state is irrelevant: it will be done on the GPU. @@ -1711,11 +1711,11 @@ contains !!$ call z%psb_s_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() !!$ end select -!!$ end subroutine s_gpu_multi_mlt_v_2 +!!$ end subroutine s_cuda_multi_mlt_v_2 - subroutine s_gpu_multi_set_scal(x,val) - class(psb_s_multivect_gpu), intent(inout) :: x + subroutine s_cuda_multi_set_scal(x,val) + class(psb_s_multivect_cuda), intent(inout) :: x real(psb_spk_), intent(in) :: val integer(psb_ipk_) :: info @@ -1723,10 +1723,10 @@ contains if (x%is_dev()) call x%sync() call x%psb_s_base_multivect_type%set_scal(val) call x%set_host() - end subroutine s_gpu_multi_set_scal + end subroutine s_cuda_multi_set_scal - subroutine s_gpu_multi_set_vect(x,val) - class(psb_s_multivect_gpu), intent(inout) :: x + subroutine s_cuda_multi_set_vect(x,val) + class(psb_s_multivect_cuda), intent(inout) :: x real(psb_spk_), intent(in) :: val(:,:) integer(psb_ipk_) :: nr integer(psb_ipk_) :: info @@ -1735,24 +1735,24 @@ contains call x%psb_s_base_multivect_type%set_vect(val) call x%set_host() - end subroutine s_gpu_multi_set_vect + end subroutine s_cuda_multi_set_vect -!!$ subroutine s_gpu_multi_scal(alpha, x) +!!$ subroutine s_cuda_multi_scal(alpha, x) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ real(psb_spk_), intent (in) :: alpha !!$ !!$ if (x%is_dev()) call x%sync() !!$ call x%psb_s_base_multivect_type%scal(alpha) !!$ call x%set_host() -!!$ end subroutine s_gpu_multi_scal +!!$ end subroutine s_cuda_multi_scal !!$ !!$ -!!$ function s_gpu_multi_nrm2(n,x) result(res) +!!$ function s_cuda_multi_nrm2(n,x) result(res) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ integer(psb_ipk_) :: info @@ -1760,36 +1760,36 @@ contains !!$ if (x%is_host()) call x%sync() !!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) !!$ -!!$ end function s_gpu_multi_nrm2 +!!$ end function s_cuda_multi_nrm2 !!$ -!!$ function s_gpu_multi_amax(n,x) result(res) +!!$ function s_cuda_multi_amax(n,x) result(res) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = maxval(abs(x%v(1:n))) !!$ -!!$ end function s_gpu_multi_amax +!!$ end function s_cuda_multi_amax !!$ -!!$ function s_gpu_multi_asum(n,x) result(res) +!!$ function s_cuda_multi_asum(n,x) result(res) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = sum(abs(x%v(1:n))) !!$ -!!$ end function s_gpu_multi_asum +!!$ end function s_cuda_multi_asum - subroutine s_gpu_multi_all(m,n, x, info) + subroutine s_cuda_multi_all(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_multivect_gpu), intent(out) :: x + class(psb_s_multivect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(m,n,x%v,info,pad=szero) @@ -1799,26 +1799,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'s_gpu_multi_all',& + call psb_errpush(info,'s_cuda_multi_all',& & i_err=(/m,n,n,n,n/)) end if - end subroutine s_gpu_multi_all + end subroutine s_cuda_multi_all - subroutine s_gpu_multi_zero(x) + subroutine s_cuda_multi_zero(x) use psi_serial_mod implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine s_gpu_multi_zero + end subroutine s_cuda_multi_zero - subroutine s_gpu_multi_asb(m,n, x, info) + subroutine s_cuda_multi_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nd, nc @@ -1838,12 +1838,12 @@ contains call x%set_host() end if end if - end subroutine s_gpu_multi_asb + end subroutine s_cuda_multi_asb - subroutine s_gpu_multi_sync_space(x,info) + subroutine s_cuda_multi_sync_space(x,info) use psb_realloc_mod implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: mh,nh,md,nd @@ -1896,11 +1896,11 @@ contains end if - end subroutine s_gpu_multi_sync_space + end subroutine s_cuda_multi_sync_space - subroutine s_gpu_multi_sync(x) + subroutine s_cuda_multi_sync(x) implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -1916,16 +1916,16 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'s_gpu_multi_sync') + call psb_errpush(info,'s_cuda_multi_sync') end if - end subroutine s_gpu_multi_sync + end subroutine s_cuda_multi_sync - subroutine s_gpu_multi_free(x, info) + subroutine s_cuda_multi_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 @@ -1940,13 +1940,13 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine s_gpu_multi_free + end subroutine s_cuda_multi_free - subroutine s_gpu_multi_vect_finalize(x) + subroutine s_cuda_multi_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_s_multivect_gpu), intent(inout) :: x + type(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 @@ -1961,12 +1961,12 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine s_gpu_multi_vect_finalize + end subroutine s_cuda_multi_vect_finalize - subroutine s_gpu_multi_ins(n,irl,val,dupl,x,info) + subroutine s_cuda_multi_ins(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) real(psb_spk_), intent(in) :: val(:,:) @@ -1979,11 +1979,11 @@ contains call x%psb_s_base_multivect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine s_gpu_multi_ins + end subroutine s_cuda_multi_ins #endif -end module psb_s_gpu_multivect_mod +end module psb_s_cuda_multivect_mod diff --git a/cuda/psb_z_csrg_mat_mod.F90 b/cuda/psb_z_csrg_mat_mod.F90 deleted file mode 100644 index 14df1124..00000000 --- a/cuda/psb_z_csrg_mat_mod.F90 +++ /dev/null @@ -1,393 +0,0 @@ -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! - - -module psb_z_csrg_mat_mod - - use iso_c_binding - use psb_z_mat_mod - use cusparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_z_csr_sparse_mat) :: psb_z_csrg_sparse_mat - ! - ! cuSPARSE 4.0 CSR format. - ! - ! - ! - ! - ! -#ifdef HAVE_SPGPU - type(z_Cmat) :: deviceMat - integer(psb_ipk_) :: devstate = is_host - - contains - procedure, nopass :: get_fmt => z_csrg_get_fmt - procedure, pass(a) :: sizeof => z_csrg_sizeof - procedure, pass(a) :: vect_mv => psb_z_csrg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_z_csrg_inner_vect_sv - procedure, pass(a) :: csmm => psb_z_csrg_csmm - procedure, pass(a) :: csmv => psb_z_csrg_csmv - procedure, pass(a) :: scals => psb_z_csrg_scals - procedure, pass(a) :: scalv => psb_z_csrg_scal - procedure, pass(a) :: reallocate_nz => psb_z_csrg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_z_csrg_allocate_mnnz - ! Note: we do *not* need the TO methods, because the parent type - ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_csrg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_cp_csrg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_csrg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_mv_csrg_from_fmt - procedure, pass(a) :: free => z_csrg_free - procedure, pass(a) :: mold => psb_z_csrg_mold - procedure, pass(a) :: is_host => z_csrg_is_host - procedure, pass(a) :: is_dev => z_csrg_is_dev - procedure, pass(a) :: is_sync => z_csrg_is_sync - procedure, pass(a) :: set_host => z_csrg_set_host - procedure, pass(a) :: set_dev => z_csrg_set_dev - procedure, pass(a) :: set_sync => z_csrg_set_sync - procedure, pass(a) :: sync => z_csrg_sync - procedure, pass(a) :: to_gpu => psb_z_csrg_to_gpu - procedure, pass(a) :: from_gpu => psb_z_csrg_from_gpu - final :: z_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_csrg_mold -#endif - end type psb_z_csrg_sparse_mat - -#ifdef HAVE_SPGPU - private :: z_csrg_get_nzeros, z_csrg_free, z_csrg_get_fmt, & - & z_csrg_get_size, z_csrg_sizeof, z_csrg_get_nz_row - - - interface - subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta - class(psb_z_base_vect_type), intent(inout) :: x - class(psb_z_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_csrg_inner_vect_sv - end interface - - - interface - subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta - class(psb_z_base_vect_type), intent(inout) :: x - class(psb_z_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_csrg_vect_mv - end interface - - interface - subroutine psb_z_csrg_reallocate_nz(nz,a) - import :: psb_z_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: nz - class(psb_z_csrg_sparse_mat), intent(inout) :: a - end subroutine psb_z_csrg_reallocate_nz - end interface - - interface - subroutine psb_z_csrg_allocate_mnnz(m,n,a,nz) - import :: psb_z_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: m,n - class(psb_z_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_z_csrg_allocate_mnnz - end interface - - interface - subroutine psb_z_csrg_mold(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_csrg_mold - end interface - - interface - subroutine psb_z_csrg_to_gpu(a,info, nzrm) - import :: psb_z_csrg_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_z_csrg_to_gpu - end interface - - interface - subroutine psb_z_csrg_from_gpu(a,info) - import :: psb_z_csrg_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_csrg_from_gpu - end interface - - interface - subroutine psb_z_cp_csrg_from_coo(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - class(psb_z_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_csrg_from_coo - end interface - - interface - subroutine psb_z_cp_csrg_from_fmt(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - class(psb_z_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_csrg_from_fmt - end interface - - interface - subroutine psb_z_mv_csrg_from_coo(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - class(psb_z_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_csrg_from_coo - end interface - - interface - subroutine psb_z_mv_csrg_from_fmt(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - class(psb_z_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_csrg_from_fmt - end interface - - interface - subroutine psb_z_csrg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta, x(:) - complex(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_csrg_csmv - end interface - interface - subroutine psb_z_csrg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - complex(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_csrg_csmm - end interface - - interface - subroutine psb_z_csrg_scal(d,a,info,side) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - complex(psb_dpk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side - end subroutine psb_z_csrg_scal - end interface - - interface - subroutine psb_z_csrg_scals(d,a,info) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - complex(psb_dpk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_csrg_scals - end interface - - -contains - - ! == =================================== - ! - ! - ! - ! Getters - ! - ! - ! - ! - ! - ! == =================================== - - - function z_csrg_sizeof(a) result(res) - implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - res = 8 - res = res + (2*psb_sizeof_dp) * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - ! Should we account for the shadow data structure - ! on the GPU device side? - ! res = 2*res - - end function z_csrg_sizeof - - function z_csrg_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSRG' - end function z_csrg_get_fmt - - - - ! == =================================== - ! - ! - ! - ! Data management - ! - ! - ! - ! - ! - ! == =================================== - - - subroutine z_csrg_set_host(a) - implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine z_csrg_set_host - - subroutine z_csrg_set_dev(a) - implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine z_csrg_set_dev - - subroutine z_csrg_set_sync(a) - implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine z_csrg_set_sync - - function z_csrg_is_dev(a) result(res) - implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function z_csrg_is_dev - - function z_csrg_is_host(a) result(res) - implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function z_csrg_is_host - - function z_csrg_is_sync(a) result(res) - implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function z_csrg_is_sync - - - subroutine z_csrg_sync(a) - implicit none - class(psb_z_csrg_sparse_mat), target, intent(in) :: a - class(psb_z_csrg_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (tmpa%is_host()) then - call tmpa%to_gpu(info) - else if (tmpa%is_dev()) then - call tmpa%from_gpu(info) - end if - call tmpa%set_sync() - return - - end subroutine z_csrg_sync - - subroutine z_csrg_free(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - class(psb_z_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - call a%psb_z_csr_sparse_mat%free() - - return - - end subroutine z_csrg_free - - subroutine z_csrg_finalize(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - type(psb_z_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - - return - - end subroutine z_csrg_finalize - -#else - interface - subroutine psb_z_csrg_mold(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_csrg_mold - end interface - -#endif - -end module psb_z_csrg_mat_mod diff --git a/cuda/psb_z_cuda_csrg_mat_mod.F90 b/cuda/psb_z_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..75170185 --- /dev/null +++ b/cuda/psb_z_cuda_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_cuda_csrg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_csr_sparse_mat) :: psb_z_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(z_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => z_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_z_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_z_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => z_cuda_csrg_free + procedure, pass(a) :: mold => psb_z_cuda_csrg_mold + procedure, pass(a) :: is_host => z_cuda_csrg_is_host + procedure, pass(a) :: is_dev => z_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => z_cuda_csrg_is_sync + procedure, pass(a) :: set_host => z_cuda_csrg_set_host + procedure, pass(a) :: set_dev => z_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => z_cuda_csrg_set_sync + procedure, pass(a) :: sync => z_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_z_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_z_cuda_csrg_from_gpu + final :: z_cuda_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_cuda_csrg_mold +#endif + end type psb_z_cuda_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_cuda_csrg_get_nzeros, z_cuda_csrg_free, z_cuda_csrg_get_fmt, & + & z_cuda_csrg_get_size, z_cuda_csrg_sizeof, z_cuda_csrg_get_nz_row + + + interface + subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_z_cuda_csrg_reallocate_nz(nz,a) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_z_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_z_cuda_csrg_mold(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_mold + end interface + + interface + subroutine psb_z_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_z_cuda_csrg_from_gpu(a,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_z_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_z_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_z_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_z_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_csmv + end interface + interface + subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_csmm + end interface + + interface + subroutine psb_z_cuda_csrg_scal(d,a,info,side) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_cuda_csrg_scal + end interface + + interface + subroutine psb_z_cuda_csrg_scals(d,a,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_cuda_csrg_sizeof + + function z_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function z_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine z_cuda_csrg_set_host(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_cuda_csrg_set_host + + subroutine z_cuda_csrg_set_dev(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_cuda_csrg_set_dev + + subroutine z_cuda_csrg_set_sync(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_cuda_csrg_set_sync + + function z_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_cuda_csrg_is_dev + + function z_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_cuda_csrg_is_host + + function z_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_cuda_csrg_is_sync + + + subroutine z_cuda_csrg_sync(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine z_cuda_csrg_sync + + subroutine z_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_z_csr_sparse_mat%free() + + return + + end subroutine z_cuda_csrg_free + + subroutine z_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine z_cuda_csrg_finalize + +#else + interface + subroutine psb_z_cuda_csrg_mold(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_mold + end interface + +#endif + +end module psb_z_cuda_csrg_mat_mod diff --git a/cuda/psb_z_diag_mat_mod.F90 b/cuda/psb_z_cuda_diag_mat_mod.F90 similarity index 52% rename from cuda/psb_z_diag_mat_mod.F90 rename to cuda/psb_z_cuda_diag_mat_mod.F90 index 986d75d9..80906778 100644 --- a/cuda/psb_z_diag_mat_mod.F90 +++ b/cuda/psb_z_cuda_diag_mat_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_z_diag_mat_mod +module psb_z_cuda_diag_mat_mod use iso_c_binding use psb_base_mod use psb_z_dia_mat_mod - type, extends(psb_z_dia_sparse_mat) :: psb_z_diag_sparse_mat + type, extends(psb_z_dia_sparse_mat) :: psb_z_cuda_diag_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -48,170 +48,170 @@ module psb_z_diag_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => z_diag_get_fmt - procedure, pass(a) :: sizeof => z_diag_sizeof - procedure, pass(a) :: vect_mv => psb_z_diag_vect_mv -! procedure, pass(a) :: csmm => psb_z_diag_csmm - procedure, pass(a) :: csmv => psb_z_diag_csmv -! procedure, pass(a) :: in_vect_sv => psb_z_diag_inner_vect_sv -! procedure, pass(a) :: scals => psb_z_diag_scals -! procedure, pass(a) :: scalv => psb_z_diag_scal -! procedure, pass(a) :: reallocate_nz => psb_z_diag_reallocate_nz -! procedure, pass(a) :: allocate_mnnz => psb_z_diag_allocate_mnnz + procedure, nopass :: get_fmt => z_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => z_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_z_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_z_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_z_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_z_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_z_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_z_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_z_cuda_diag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_diag_from_coo -! procedure, pass(a) :: cp_from_fmt => psb_z_cp_diag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_diag_from_coo -! procedure, pass(a) :: mv_from_fmt => psb_z_mv_diag_from_fmt - procedure, pass(a) :: free => z_diag_free - procedure, pass(a) :: mold => psb_z_diag_mold - procedure, pass(a) :: to_gpu => psb_z_diag_to_gpu - final :: z_diag_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => z_cuda_diag_free + procedure, pass(a) :: mold => psb_z_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_diag_to_gpu + final :: z_cuda_diag_finalize #else contains - procedure, pass(a) :: mold => psb_z_diag_mold + procedure, pass(a) :: mold => psb_z_cuda_diag_mold #endif - end type psb_z_diag_sparse_mat + end type psb_z_cuda_diag_sparse_mat #ifdef HAVE_SPGPU - private :: z_diag_get_nzeros, z_diag_free, z_diag_get_fmt, & - & z_diag_get_size, z_diag_sizeof, z_diag_get_nz_row + private :: z_cuda_diag_get_nzeros, z_cuda_diag_free, z_cuda_diag_get_fmt, & + & z_cuda_diag_get_size, z_cuda_diag_sizeof, z_cuda_diag_get_nz_row interface - subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_diag_vect_mv + end subroutine psb_z_cuda_diag_vect_mv end interface interface - subroutine psb_z_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_z_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_diag_inner_vect_sv + end subroutine psb_z_cuda_diag_inner_vect_sv end interface interface - subroutine psb_z_diag_reallocate_nz(nz,a) - import :: psb_z_diag_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_diag_reallocate_nz(nz,a) + import :: psb_z_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_z_diag_sparse_mat), intent(inout) :: a - end subroutine psb_z_diag_reallocate_nz + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_diag_reallocate_nz end interface interface - subroutine psb_z_diag_allocate_mnnz(m,n,a,nz) - import :: psb_z_diag_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_z_diag_allocate_mnnz + end subroutine psb_z_cuda_diag_allocate_mnnz end interface interface - subroutine psb_z_diag_mold(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_mold(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_diag_mold + end subroutine psb_z_cuda_diag_mold end interface interface - subroutine psb_z_diag_to_gpu(a,info, nzrm) - import :: psb_z_diag_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_z_cuda_diag_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_z_diag_to_gpu + end subroutine psb_z_cuda_diag_to_gpu end interface interface - subroutine psb_z_cp_diag_from_coo(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_diag_from_coo(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_diag_from_coo + end subroutine psb_z_cuda_cp_diag_from_coo end interface interface - subroutine psb_z_cp_diag_from_fmt(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_diag_from_fmt + end subroutine psb_z_cuda_cp_diag_from_fmt end interface interface - subroutine psb_z_mv_diag_from_coo(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_diag_from_coo(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_diag_from_coo + end subroutine psb_z_cuda_mv_diag_from_coo end interface interface - subroutine psb_z_mv_diag_from_fmt(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_diag_from_fmt + end subroutine psb_z_cuda_mv_diag_from_fmt end interface interface - subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_diag_csmv + end subroutine psb_z_cuda_diag_csmv end interface interface - subroutine psb_z_diag_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_diag_csmm + end subroutine psb_z_cuda_diag_csmm end interface interface - subroutine psb_z_diag_scal(d,a,info, side) - import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_diag_scal(d,a,info, side) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_z_diag_scal + end subroutine psb_z_cuda_diag_scal end interface interface - subroutine psb_z_diag_scals(d,a,info) - import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_diag_scals(d,a,info) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_diag_scals + end subroutine psb_z_cuda_diag_scals end interface @@ -230,9 +230,9 @@ contains ! == =================================== - function z_diag_sizeof(a) result(res) + function z_cuda_diag_sizeof(a) result(res) implicit none - class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function z_diag_sizeof + end function z_cuda_diag_sizeof - function z_diag_get_fmt() result(res) + function z_cuda_diag_get_fmt() result(res) implicit none character(len=5) :: res res = 'DIAG' - end function z_diag_get_fmt + end function z_cuda_diag_get_fmt @@ -265,11 +265,11 @@ contains ! ! == =================================== - subroutine z_diag_free(a) + subroutine z_cuda_diag_free(a) use diagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) @@ -278,31 +278,31 @@ contains return - end subroutine z_diag_free + end subroutine z_cuda_diag_free - subroutine z_diag_finalize(a) + subroutine z_cuda_diag_finalize(a) use diagdev_mod implicit none - type(psb_z_diag_sparse_mat), intent(inout) :: a + type(psb_z_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine z_diag_finalize + end subroutine z_cuda_diag_finalize #else interface - subroutine psb_z_diag_mold(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_mold(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_diag_mold + end subroutine psb_z_cuda_diag_mold end interface #endif -end module psb_z_diag_mat_mod +end module psb_z_cuda_diag_mat_mod diff --git a/cuda/psb_z_dnsg_mat_mod.F90 b/cuda/psb_z_cuda_dnsg_mat_mod.F90 similarity index 51% rename from cuda/psb_z_dnsg_mat_mod.F90 rename to cuda/psb_z_cuda_dnsg_mat_mod.F90 index 6a3d4369..3fb2488b 100644 --- a/cuda/psb_z_dnsg_mat_mod.F90 +++ b/cuda/psb_z_cuda_dnsg_mat_mod.F90 @@ -30,14 +30,14 @@ ! -module psb_z_dnsg_mat_mod +module psb_z_cuda_dnsg_mat_mod use iso_c_binding use psb_z_mat_mod use psb_z_dns_mat_mod use dnsdev_mod - type, extends(psb_z_dns_sparse_mat) :: psb_z_dnsg_sparse_mat + type, extends(psb_z_dns_sparse_mat) :: psb_z_cuda_dnsg_sparse_mat ! ! ITPACK/DNS format, extended. ! We are adding here the routines to create a copy of the data @@ -49,169 +49,169 @@ module psb_z_dnsg_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => z_dnsg_get_fmt - ! procedure, pass(a) :: sizeof => z_dnsg_sizeof - procedure, pass(a) :: vect_mv => psb_z_dnsg_vect_mv -!!$ procedure, pass(a) :: csmm => psb_z_dnsg_csmm -!!$ procedure, pass(a) :: csmv => psb_z_dnsg_csmv -!!$ procedure, pass(a) :: in_vect_sv => psb_z_dnsg_inner_vect_sv -!!$ procedure, pass(a) :: scals => psb_z_dnsg_scals -!!$ procedure, pass(a) :: scalv => psb_z_dnsg_scal -!!$ procedure, pass(a) :: reallocate_nz => psb_z_dnsg_reallocate_nz -!!$ procedure, pass(a) :: allocate_mnnz => psb_z_dnsg_allocate_mnnz + procedure, nopass :: get_fmt => z_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => z_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_z_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_z_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_z_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_z_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_z_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_z_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_z_cuda_dnsg_allocate_mnnz ! Note: we *do* need the TO methods, because of the need to invoke SYNC ! - procedure, pass(a) :: cp_from_coo => psb_z_cp_dnsg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_cp_dnsg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_dnsg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_mv_dnsg_from_fmt - procedure, pass(a) :: free => z_dnsg_free - procedure, pass(a) :: mold => psb_z_dnsg_mold - procedure, pass(a) :: to_gpu => psb_z_dnsg_to_gpu - final :: z_dnsg_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => z_cuda_dnsg_free + procedure, pass(a) :: mold => psb_z_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_dnsg_to_gpu + final :: z_cuda_dnsg_finalize #else contains - procedure, pass(a) :: mold => psb_z_dnsg_mold + procedure, pass(a) :: mold => psb_z_cuda_dnsg_mold #endif - end type psb_z_dnsg_sparse_mat + end type psb_z_cuda_dnsg_sparse_mat #ifdef HAVE_SPGPU - private :: z_dnsg_get_nzeros, z_dnsg_free, z_dnsg_get_fmt, & - & z_dnsg_get_size, z_dnsg_get_nz_row + private :: z_cuda_dnsg_get_nzeros, z_cuda_dnsg_free, z_cuda_dnsg_get_fmt, & + & z_cuda_dnsg_get_size, z_cuda_dnsg_get_nz_row interface - subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_dnsg_vect_mv + end subroutine psb_z_cuda_dnsg_vect_mv end interface !!$ !!$ interface -!!$ subroutine psb_z_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_z_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type -!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_z_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_dnsg_inner_vect_sv +!!$ end subroutine psb_z_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_z_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_z_dnsg_reallocate_nz +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_z_dnsg_allocate_mnnz +!!$ end subroutine psb_z_cuda_dnsg_allocate_mnnz !!$ end interface interface - subroutine psb_z_dnsg_mold(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_dnsg_mold(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_dnsg_mold + end subroutine psb_z_cuda_dnsg_mold end interface interface - subroutine psb_z_dnsg_to_gpu(a,info) - import :: psb_z_dnsg_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_dnsg_to_gpu(a,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_dnsg_to_gpu + end subroutine psb_z_cuda_dnsg_to_gpu end interface interface - subroutine psb_z_cp_dnsg_from_coo(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_dnsg_from_coo + end subroutine psb_z_cuda_cp_dnsg_from_coo end interface interface - subroutine psb_z_cp_dnsg_from_fmt(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_dnsg_from_fmt + end subroutine psb_z_cuda_cp_dnsg_from_fmt end interface interface - subroutine psb_z_mv_dnsg_from_coo(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_dnsg_from_coo + end subroutine psb_z_cuda_mv_dnsg_from_coo end interface interface - subroutine psb_z_mv_dnsg_from_fmt(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_dnsg_from_fmt + end subroutine psb_z_cuda_mv_dnsg_from_fmt end interface !!$ interface -!!$ subroutine psb_z_dnsg_csmv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:) !!$ complex(psb_dpk_), intent(inout) :: y(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_dnsg_csmv +!!$ end subroutine psb_z_cuda_dnsg_csmv !!$ end interface !!$ interface -!!$ subroutine psb_z_dnsg_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) !!$ complex(psb_dpk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_dnsg_csmm +!!$ end subroutine psb_z_cuda_dnsg_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_dnsg_scal(d,a,info, side) -!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ complex(psb_dpk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_z_dnsg_scal +!!$ end subroutine psb_z_cuda_dnsg_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_dnsg_scals(d,a,info) -!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ complex(psb_dpk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_z_dnsg_scals +!!$ end subroutine psb_z_cuda_dnsg_scals !!$ end interface !!$ @@ -231,11 +231,11 @@ contains - function z_dnsg_get_fmt() result(res) + function z_cuda_dnsg_get_fmt() result(res) implicit none character(len=5) :: res res = 'DNSG' - end function z_dnsg_get_fmt + end function z_cuda_dnsg_get_fmt @@ -251,11 +251,11 @@ contains ! ! == =================================== - subroutine z_dnsg_free(a) + subroutine z_cuda_dnsg_free(a) use dnsdev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) @@ -264,31 +264,31 @@ contains return - end subroutine z_dnsg_free + end subroutine z_cuda_dnsg_free - subroutine z_dnsg_finalize(a) + subroutine z_cuda_dnsg_finalize(a) use dnsdev_mod implicit none - type(psb_z_dnsg_sparse_mat), intent(inout) :: a + type(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine z_dnsg_finalize + end subroutine z_cuda_dnsg_finalize #else interface - subroutine psb_z_dnsg_mold(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_dnsg_mold(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_dnsg_mold + end subroutine psb_z_cuda_dnsg_mold end interface #endif -end module psb_z_dnsg_mat_mod +end module psb_z_cuda_dnsg_mat_mod diff --git a/cuda/psb_z_elg_mat_mod.F90 b/cuda/psb_z_cuda_elg_mat_mod.F90 similarity index 50% rename from cuda/psb_z_elg_mat_mod.F90 rename to cuda/psb_z_cuda_elg_mat_mod.F90 index cf9e479c..9090b0a2 100644 --- a/cuda/psb_z_elg_mat_mod.F90 +++ b/cuda/psb_z_cuda_elg_mat_mod.F90 @@ -30,18 +30,18 @@ ! -module psb_z_elg_mat_mod +module psb_z_cuda_elg_mat_mod use iso_c_binding use psb_z_mat_mod use psb_z_ell_mat_mod - use psb_i_gpu_vect_mod + use psb_i_cuda_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_z_ell_sparse_mat) :: psb_z_elg_sparse_mat + type, extends(psb_z_ell_sparse_mat) :: psb_z_cuda_elg_sparse_mat ! ! ITPACK/ELL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,221 +54,221 @@ module psb_z_elg_mat_mod integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => z_elg_get_fmt - procedure, pass(a) :: sizeof => z_elg_sizeof - procedure, pass(a) :: vect_mv => psb_z_elg_vect_mv - procedure, pass(a) :: csmm => psb_z_elg_csmm - procedure, pass(a) :: csmv => psb_z_elg_csmv - procedure, pass(a) :: in_vect_sv => psb_z_elg_inner_vect_sv - procedure, pass(a) :: scals => psb_z_elg_scals - procedure, pass(a) :: scalv => psb_z_elg_scal - procedure, pass(a) :: reallocate_nz => psb_z_elg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_z_elg_allocate_mnnz - procedure, pass(a) :: reinit => z_elg_reinit + procedure, nopass :: get_fmt => z_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_z_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_z_cuda_elg_scals + procedure, pass(a) :: scalv => psb_z_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => z_cuda_elg_reinit ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_elg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_cp_elg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_elg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_mv_elg_from_fmt - procedure, pass(a) :: free => z_elg_free - procedure, pass(a) :: mold => psb_z_elg_mold - procedure, pass(a) :: csput_a => psb_z_elg_csput_a - procedure, pass(a) :: csput_v => psb_z_elg_csput_v - procedure, pass(a) :: is_host => z_elg_is_host - procedure, pass(a) :: is_dev => z_elg_is_dev - procedure, pass(a) :: is_sync => z_elg_is_sync - procedure, pass(a) :: set_host => z_elg_set_host - procedure, pass(a) :: set_dev => z_elg_set_dev - procedure, pass(a) :: set_sync => z_elg_set_sync - procedure, pass(a) :: sync => z_elg_sync - procedure, pass(a) :: from_gpu => psb_z_elg_from_gpu - procedure, pass(a) :: to_gpu => psb_z_elg_to_gpu - procedure, pass(a) :: asb => psb_z_elg_asb - final :: z_elg_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => z_cuda_elg_free + procedure, pass(a) :: mold => psb_z_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_z_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_z_cuda_elg_csput_v + procedure, pass(a) :: is_host => z_cuda_elg_is_host + procedure, pass(a) :: is_dev => z_cuda_elg_is_dev + procedure, pass(a) :: is_sync => z_cuda_elg_is_sync + procedure, pass(a) :: set_host => z_cuda_elg_set_host + procedure, pass(a) :: set_dev => z_cuda_elg_set_dev + procedure, pass(a) :: set_sync => z_cuda_elg_set_sync + procedure, pass(a) :: sync => z_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_z_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_z_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_z_cuda_elg_asb + final :: z_cuda_elg_finalize #else contains - procedure, pass(a) :: mold => psb_z_elg_mold - procedure, pass(a) :: asb => psb_z_elg_asb + procedure, pass(a) :: mold => psb_z_cuda_elg_mold + procedure, pass(a) :: asb => psb_z_cuda_elg_asb #endif - end type psb_z_elg_sparse_mat + end type psb_z_cuda_elg_sparse_mat #ifdef HAVE_SPGPU - private :: z_elg_get_nzeros, z_elg_free, z_elg_get_fmt, & - & z_elg_get_size, z_elg_sizeof, z_elg_get_nz_row, z_elg_sync + private :: z_cuda_elg_get_nzeros, z_cuda_elg_free, z_cuda_elg_get_fmt, & + & z_cuda_elg_get_size, z_cuda_elg_sizeof, z_cuda_elg_get_nz_row, z_cuda_elg_sync interface - subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_elg_vect_mv + end subroutine psb_z_cuda_elg_vect_mv end interface interface - subroutine psb_z_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_z_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_elg_inner_vect_sv + end subroutine psb_z_cuda_elg_inner_vect_sv end interface interface - subroutine psb_z_elg_reallocate_nz(nz,a) - import :: psb_z_elg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_elg_reallocate_nz(nz,a) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_z_elg_sparse_mat), intent(inout) :: a - end subroutine psb_z_elg_reallocate_nz + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_elg_reallocate_nz end interface interface - subroutine psb_z_elg_allocate_mnnz(m,n,a,nz) - import :: psb_z_elg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_z_elg_allocate_mnnz + end subroutine psb_z_cuda_elg_allocate_mnnz end interface interface - subroutine psb_z_elg_mold(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_mold(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_mold + end subroutine psb_z_cuda_elg_mold end interface interface - subroutine psb_z_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& & imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_csput_a + end subroutine psb_z_cuda_elg_csput_a end interface interface - subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_z_base_vect_type,& + subroutine psb_z_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_z_base_vect_type,& & psb_i_base_vect_type - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_csput_v + end subroutine psb_z_cuda_elg_csput_v end interface interface - subroutine psb_z_elg_from_gpu(a,info) - import :: psb_z_elg_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_elg_from_gpu(a,info) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_from_gpu + end subroutine psb_z_cuda_elg_from_gpu end interface interface - subroutine psb_z_elg_to_gpu(a,info, nzrm) - import :: psb_z_elg_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_z_elg_to_gpu + end subroutine psb_z_cuda_elg_to_gpu end interface interface - subroutine psb_z_cp_elg_from_coo(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_elg_from_coo + end subroutine psb_z_cuda_cp_elg_from_coo end interface interface - subroutine psb_z_cp_elg_from_fmt(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_elg_from_fmt + end subroutine psb_z_cuda_cp_elg_from_fmt end interface interface - subroutine psb_z_mv_elg_from_coo(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_elg_from_coo(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_elg_from_coo + end subroutine psb_z_cuda_mv_elg_from_coo end interface interface - subroutine psb_z_mv_elg_from_fmt(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_elg_from_fmt + end subroutine psb_z_cuda_mv_elg_from_fmt end interface interface - subroutine psb_z_elg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_elg_csmv + end subroutine psb_z_cuda_elg_csmv end interface interface - subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_elg_csmm + end subroutine psb_z_cuda_elg_csmm end interface interface - subroutine psb_z_elg_scal(d,a,info, side) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_elg_scal(d,a,info, side) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_z_elg_scal + end subroutine psb_z_cuda_elg_scal end interface interface - subroutine psb_z_elg_scals(d,a,info) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_elg_scals(d,a,info) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_scals + end subroutine psb_z_cuda_elg_scals end interface interface - subroutine psb_z_elg_asb(a) - import :: psb_z_elg_sparse_mat - class(psb_z_elg_sparse_mat), intent(inout) :: a - end subroutine psb_z_elg_asb + subroutine psb_z_cuda_elg_asb(a) + import :: psb_z_cuda_elg_sparse_mat + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_elg_asb end interface @@ -287,9 +287,9 @@ contains ! == =================================== - function z_elg_sizeof(a) result(res) + function z_cuda_elg_sizeof(a) result(res) implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res if (a%is_dev()) call a%sync() @@ -302,13 +302,13 @@ contains ! on the GPU device side? ! res = 2*res - end function z_elg_sizeof + end function z_cuda_elg_sizeof - function z_elg_get_fmt() result(res) + function z_cuda_elg_get_fmt() result(res) implicit none character(len=5) :: res res = 'ELG' - end function z_elg_get_fmt + end function z_cuda_elg_get_fmt @@ -323,12 +323,12 @@ contains ! ! ! == =================================== - subroutine z_elg_reinit(a,clear) + subroutine z_cuda_elg_reinit(a,clear) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a logical, intent(in), optional :: clear integer(psb_ipk_) :: isz, err_act character(len=20) :: name='reinit' @@ -367,14 +367,14 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine z_elg_reinit + end subroutine z_cuda_elg_reinit - subroutine z_elg_free(a) + subroutine z_cuda_elg_free(a) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) @@ -384,12 +384,12 @@ contains return - end subroutine z_elg_free + end subroutine z_cuda_elg_free - subroutine z_elg_sync(a) + subroutine z_cuda_elg_sync(a) implicit none - class(psb_z_elg_sparse_mat), target, intent(in) :: a - class(psb_z_elg_sparse_mat), pointer :: tmpa + class(psb_z_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -401,83 +401,83 @@ contains call tmpa%set_sync() return - end subroutine z_elg_sync + end subroutine z_cuda_elg_sync - subroutine z_elg_set_host(a) + subroutine z_cuda_elg_set_host(a) implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine z_elg_set_host + end subroutine z_cuda_elg_set_host - subroutine z_elg_set_dev(a) + subroutine z_cuda_elg_set_dev(a) implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine z_elg_set_dev + end subroutine z_cuda_elg_set_dev - subroutine z_elg_set_sync(a) + subroutine z_cuda_elg_set_sync(a) implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine z_elg_set_sync + end subroutine z_cuda_elg_set_sync - function z_elg_is_dev(a) result(res) + function z_cuda_elg_is_dev(a) result(res) implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function z_elg_is_dev + end function z_cuda_elg_is_dev - function z_elg_is_host(a) result(res) + function z_cuda_elg_is_host(a) result(res) implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function z_elg_is_host + end function z_cuda_elg_is_host - function z_elg_is_sync(a) result(res) + function z_cuda_elg_is_sync(a) result(res) implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function z_elg_is_sync + end function z_cuda_elg_is_sync - subroutine z_elg_finalize(a) + subroutine z_cuda_elg_finalize(a) use elldev_mod implicit none - type(psb_z_elg_sparse_mat), intent(inout) :: a + type(psb_z_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine z_elg_finalize + end subroutine z_cuda_elg_finalize #else interface - subroutine psb_z_elg_asb(a) - import :: psb_z_elg_sparse_mat - class(psb_z_elg_sparse_mat), intent(inout) :: a - end subroutine psb_z_elg_asb + subroutine psb_z_cuda_elg_asb(a) + import :: psb_z_cuda_elg_sparse_mat + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_elg_asb end interface interface - subroutine psb_z_elg_mold(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_mold(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_mold + end subroutine psb_z_cuda_elg_mold end interface #endif -end module psb_z_elg_mat_mod +end module psb_z_cuda_elg_mat_mod diff --git a/cuda/psb_z_hdiag_mat_mod.F90 b/cuda/psb_z_cuda_hdiag_mat_mod.F90 similarity index 50% rename from cuda/psb_z_hdiag_mat_mod.F90 rename to cuda/psb_z_cuda_hdiag_mat_mod.F90 index 0b61cb47..b64498f6 100644 --- a/cuda/psb_z_hdiag_mat_mod.F90 +++ b/cuda/psb_z_cuda_hdiag_mat_mod.F90 @@ -30,182 +30,182 @@ ! -module psb_z_hdiag_mat_mod +module psb_z_cuda_hdiag_mat_mod use iso_c_binding use psb_base_mod use psb_z_hdia_mat_mod - type, extends(psb_z_hdia_sparse_mat) :: psb_z_hdiag_sparse_mat + type, extends(psb_z_hdia_sparse_mat) :: psb_z_cuda_hdiag_sparse_mat ! #ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => z_hdiag_get_fmt - ! procedure, pass(a) :: sizeof => z_hdiag_sizeof - procedure, pass(a) :: vect_mv => psb_z_hdiag_vect_mv - ! procedure, pass(a) :: csmm => psb_z_hdiag_csmm - procedure, pass(a) :: csmv => psb_z_hdiag_csmv - ! procedure, pass(a) :: in_vect_sv => psb_z_hdiag_inner_vect_sv - ! procedure, pass(a) :: scals => psb_z_hdiag_scals - ! procedure, pass(a) :: scalv => psb_z_hdiag_scal - ! procedure, pass(a) :: reallocate_nz => psb_z_hdiag_reallocate_nz - ! procedure, pass(a) :: allocate_mnnz => psb_z_hdiag_allocate_mnnz + procedure, nopass :: get_fmt => z_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => z_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_z_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_z_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_z_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_z_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_z_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_z_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_z_cuda_hdiag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_hdiag_from_coo - ! procedure, pass(a) :: cp_from_fmt => psb_z_cp_hdiag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_hdiag_from_coo - ! procedure, pass(a) :: mv_from_fmt => psb_z_mv_hdiag_from_fmt - procedure, pass(a) :: free => z_hdiag_free - procedure, pass(a) :: mold => psb_z_hdiag_mold - procedure, pass(a) :: to_gpu => psb_z_hdiag_to_gpu - final :: z_hdiag_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => z_cuda_hdiag_free + procedure, pass(a) :: mold => psb_z_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_hdiag_to_gpu + final :: z_cuda_hdiag_finalize #else contains - procedure, pass(a) :: mold => psb_z_hdiag_mold + procedure, pass(a) :: mold => psb_z_cuda_hdiag_mold #endif - end type psb_z_hdiag_sparse_mat + end type psb_z_cuda_hdiag_sparse_mat #ifdef HAVE_SPGPU - private :: z_hdiag_get_nzeros, z_hdiag_free, z_hdiag_get_fmt, & - & z_hdiag_get_size, z_hdiag_sizeof, z_hdiag_get_nz_row + private :: z_cuda_hdiag_get_nzeros, z_cuda_hdiag_free, z_cuda_hdiag_get_fmt, & + & z_cuda_hdiag_get_size, z_cuda_hdiag_sizeof, z_cuda_hdiag_get_nz_row interface - subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hdiag_vect_mv + end subroutine psb_z_cuda_hdiag_vect_mv end interface !!$ interface -!!$ subroutine psb_z_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_z_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type -!!$ class(psb_z_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_z_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_hdiag_inner_vect_sv +!!$ end subroutine psb_z_cuda_hdiag_inner_vect_sv !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_hdiag_reallocate_nz(nz,a) -!!$ import :: psb_z_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_z_hdiag_reallocate_nz +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_cuda_hdiag_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_hdiag_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_z_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_z_hdiag_allocate_mnnz +!!$ end subroutine psb_z_cuda_hdiag_allocate_mnnz !!$ end interface interface - subroutine psb_z_hdiag_mold(a,b,info) - import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hdiag_mold(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hdiag_mold + end subroutine psb_z_cuda_hdiag_mold end interface interface - subroutine psb_z_hdiag_to_gpu(a,info) - import :: psb_z_hdiag_sparse_mat, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hdiag_to_gpu(a,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hdiag_to_gpu + end subroutine psb_z_cuda_hdiag_to_gpu end interface interface - subroutine psb_z_cp_hdiag_from_coo(a,b,info) - import :: psb_z_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_hdiag_from_coo + end subroutine psb_z_cuda_cp_hdiag_from_coo end interface !!$ interface -!!$ subroutine psb_z_cp_hdiag_from_fmt(a,b,info) -!!$ import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_z_base_sparse_mat), intent(in) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_z_cp_hdiag_from_fmt +!!$ end subroutine psb_z_cuda_cp_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_z_mv_hdiag_from_coo(a,b,info) - import :: psb_z_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_hdiag_from_coo + end subroutine psb_z_cuda_mv_hdiag_from_coo end interface !!$ !!$ interface -!!$ subroutine psb_z_mv_hdiag_from_fmt(a,b,info) -!!$ import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_z_base_sparse_mat), intent(inout) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_z_mv_hdiag_from_fmt +!!$ end subroutine psb_z_cuda_mv_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hdiag_csmv + end subroutine psb_z_cuda_hdiag_csmv end interface !!$ interface -!!$ subroutine psb_z_hdiag_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) !!$ complex(psb_dpk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_hdiag_csmm +!!$ end subroutine psb_z_cuda_hdiag_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_hdiag_scal(d,a,info, side) -!!$ import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ complex(psb_dpk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_z_hdiag_scal +!!$ end subroutine psb_z_cuda_hdiag_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_hdiag_scals(d,a,info) -!!$ import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ complex(psb_dpk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_z_hdiag_scals +!!$ end subroutine psb_z_cuda_hdiag_scals !!$ end interface !!$ @@ -223,11 +223,11 @@ contains ! ! == =================================== - function z_hdiag_get_fmt() result(res) + function z_cuda_hdiag_get_fmt() result(res) implicit none character(len=5) :: res res = 'HDIAG' - end function z_hdiag_get_fmt + end function z_cuda_hdiag_get_fmt @@ -243,11 +243,11 @@ contains ! ! == =================================== - subroutine z_hdiag_free(a) + subroutine z_cuda_hdiag_free(a) use hdiagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -256,12 +256,12 @@ contains return - end subroutine z_hdiag_free + end subroutine z_cuda_hdiag_free - subroutine z_hdiag_finalize(a) + subroutine z_cuda_hdiag_finalize(a) use hdiagdev_mod implicit none - type(psb_z_hdiag_sparse_mat), intent(inout) :: a + type(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -269,19 +269,19 @@ contains call a%psb_z_hdia_sparse_mat%free() return - end subroutine z_hdiag_finalize + end subroutine z_cuda_hdiag_finalize #else interface - subroutine psb_z_hdiag_mold(a,b,info) - import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hdiag_mold(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hdiag_mold + end subroutine psb_z_cuda_hdiag_mold end interface #endif -end module psb_z_hdiag_mat_mod +end module psb_z_cuda_hdiag_mat_mod diff --git a/cuda/psb_z_hlg_mat_mod.F90 b/cuda/psb_z_cuda_hlg_mat_mod.F90 similarity index 50% rename from cuda/psb_z_hlg_mat_mod.F90 rename to cuda/psb_z_cuda_hlg_mat_mod.F90 index 09d490b3..29ed68fa 100644 --- a/cuda/psb_z_hlg_mat_mod.F90 +++ b/cuda/psb_z_cuda_hlg_mat_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_z_hlg_mat_mod +module psb_z_cuda_hlg_mat_mod use iso_c_binding use psb_z_mat_mod @@ -41,7 +41,7 @@ module psb_z_hlg_mat_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_z_hll_sparse_mat) :: psb_z_hlg_sparse_mat + type, extends(psb_z_hll_sparse_mat) :: psb_z_cuda_hlg_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,186 +54,186 @@ module psb_z_hlg_mat_mod integer :: devstate = is_host contains - procedure, nopass :: get_fmt => z_hlg_get_fmt - procedure, pass(a) :: sizeof => z_hlg_sizeof - procedure, pass(a) :: vect_mv => psb_z_hlg_vect_mv - procedure, pass(a) :: csmm => psb_z_hlg_csmm - procedure, pass(a) :: csmv => psb_z_hlg_csmv - procedure, pass(a) :: in_vect_sv => psb_z_hlg_inner_vect_sv - procedure, pass(a) :: scals => psb_z_hlg_scals - procedure, pass(a) :: scalv => psb_z_hlg_scal - procedure, pass(a) :: reallocate_nz => psb_z_hlg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_z_hlg_allocate_mnnz + procedure, nopass :: get_fmt => z_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_z_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_z_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_z_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_hlg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_hlg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_cp_hlg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_hlg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_mv_hlg_from_fmt - procedure, pass(a) :: free => z_hlg_free - procedure, pass(a) :: mold => psb_z_hlg_mold - procedure, pass(a) :: is_host => z_hlg_is_host - procedure, pass(a) :: is_dev => z_hlg_is_dev - procedure, pass(a) :: is_sync => z_hlg_is_sync - procedure, pass(a) :: set_host => z_hlg_set_host - procedure, pass(a) :: set_dev => z_hlg_set_dev - procedure, pass(a) :: set_sync => z_hlg_set_sync - procedure, pass(a) :: sync => z_hlg_sync - procedure, pass(a) :: from_gpu => psb_z_hlg_from_gpu - procedure, pass(a) :: to_gpu => psb_z_hlg_to_gpu - final :: z_hlg_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => z_cuda_hlg_free + procedure, pass(a) :: mold => psb_z_cuda_hlg_mold + procedure, pass(a) :: is_host => z_cuda_hlg_is_host + procedure, pass(a) :: is_dev => z_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => z_cuda_hlg_is_sync + procedure, pass(a) :: set_host => z_cuda_hlg_set_host + procedure, pass(a) :: set_dev => z_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => z_cuda_hlg_set_sync + procedure, pass(a) :: sync => z_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_z_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_z_cuda_hlg_to_gpu + final :: z_cuda_hlg_finalize #else contains - procedure, pass(a) :: mold => psb_z_hlg_mold + procedure, pass(a) :: mold => psb_z_cuda_hlg_mold #endif - end type psb_z_hlg_sparse_mat + end type psb_z_cuda_hlg_sparse_mat #ifdef HAVE_SPGPU - private :: z_hlg_get_nzeros, z_hlg_free, z_hlg_get_fmt, & - & z_hlg_get_size, z_hlg_sizeof, z_hlg_get_nz_row + private :: z_cuda_hlg_get_nzeros, z_cuda_hlg_free, z_cuda_hlg_get_fmt, & + & z_cuda_hlg_get_size, z_cuda_hlg_sizeof, z_cuda_hlg_get_nz_row interface - subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hlg_vect_mv + end subroutine psb_z_cuda_hlg_vect_mv end interface interface - subroutine psb_z_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_z_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hlg_inner_vect_sv + end subroutine psb_z_cuda_hlg_inner_vect_sv end interface interface - subroutine psb_z_hlg_reallocate_nz(nz,a) - import :: psb_z_hlg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_hlg_reallocate_nz(nz,a) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_z_hlg_sparse_mat), intent(inout) :: a - end subroutine psb_z_hlg_reallocate_nz + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_hlg_reallocate_nz end interface interface - subroutine psb_z_hlg_allocate_mnnz(m,n,a,nz) - import :: psb_z_hlg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_z_hlg_allocate_mnnz + end subroutine psb_z_cuda_hlg_allocate_mnnz end interface interface - subroutine psb_z_hlg_mold(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_mold(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hlg_mold + end subroutine psb_z_cuda_hlg_mold end interface interface - subroutine psb_z_hlg_from_gpu(a,info) - import :: psb_z_hlg_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hlg_from_gpu(a,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hlg_from_gpu + end subroutine psb_z_cuda_hlg_from_gpu end interface interface - subroutine psb_z_hlg_to_gpu(a,info, nzrm) - import :: psb_z_hlg_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_z_hlg_to_gpu + end subroutine psb_z_cuda_hlg_to_gpu end interface interface - subroutine psb_z_cp_hlg_from_coo(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_hlg_from_coo + end subroutine psb_z_cuda_cp_hlg_from_coo end interface interface - subroutine psb_z_cp_hlg_from_fmt(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_hlg_from_fmt + end subroutine psb_z_cuda_cp_hlg_from_fmt end interface interface - subroutine psb_z_mv_hlg_from_coo(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_hlg_from_coo + end subroutine psb_z_cuda_mv_hlg_from_coo end interface interface - subroutine psb_z_mv_hlg_from_fmt(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_hlg_from_fmt + end subroutine psb_z_cuda_mv_hlg_from_fmt end interface interface - subroutine psb_z_hlg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hlg_csmv + end subroutine psb_z_cuda_hlg_csmv end interface interface - subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hlg_csmm + end subroutine psb_z_cuda_hlg_csmm end interface interface - subroutine psb_z_hlg_scal(d,a,info, side) - import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hlg_scal(d,a,info, side) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_z_hlg_scal + end subroutine psb_z_cuda_hlg_scal end interface interface - subroutine psb_z_hlg_scals(d,a,info) - import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hlg_scals(d,a,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hlg_scals + end subroutine psb_z_cuda_hlg_scals end interface @@ -252,9 +252,9 @@ contains ! == =================================== - function z_hlg_sizeof(a) result(res) + function z_cuda_hlg_sizeof(a) result(res) implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res @@ -269,13 +269,13 @@ contains ! on the GPU device side? ! res = 2*res - end function z_hlg_sizeof + end function z_cuda_hlg_sizeof - function z_hlg_get_fmt() result(res) + function z_cuda_hlg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HLG' - end function z_hlg_get_fmt + end function z_cuda_hlg_get_fmt @@ -291,11 +291,11 @@ contains ! ! == =================================== - subroutine z_hlg_free(a) + subroutine z_cuda_hlg_free(a) use hlldev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) @@ -304,13 +304,13 @@ contains return - end subroutine z_hlg_free + end subroutine z_cuda_hlg_free - subroutine z_hlg_sync(a) + subroutine z_cuda_hlg_sync(a) implicit none - class(psb_z_hlg_sparse_mat), target, intent(in) :: a - class(psb_z_hlg_sparse_mat), pointer :: tmpa + class(psb_z_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -322,77 +322,77 @@ contains call tmpa%set_sync() return - end subroutine z_hlg_sync + end subroutine z_cuda_hlg_sync - subroutine z_hlg_set_host(a) + subroutine z_cuda_hlg_set_host(a) implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine z_hlg_set_host + end subroutine z_cuda_hlg_set_host - subroutine z_hlg_set_dev(a) + subroutine z_cuda_hlg_set_dev(a) implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine z_hlg_set_dev + end subroutine z_cuda_hlg_set_dev - subroutine z_hlg_set_sync(a) + subroutine z_cuda_hlg_set_sync(a) implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine z_hlg_set_sync + end subroutine z_cuda_hlg_set_sync - function z_hlg_is_dev(a) result(res) + function z_cuda_hlg_is_dev(a) result(res) implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function z_hlg_is_dev + end function z_cuda_hlg_is_dev - function z_hlg_is_host(a) result(res) + function z_cuda_hlg_is_host(a) result(res) implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function z_hlg_is_host + end function z_cuda_hlg_is_host - function z_hlg_is_sync(a) result(res) + function z_cuda_hlg_is_sync(a) result(res) implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function z_hlg_is_sync + end function z_cuda_hlg_is_sync - subroutine z_hlg_finalize(a) + subroutine z_cuda_hlg_finalize(a) use hlldev_mod implicit none - type(psb_z_hlg_sparse_mat), intent(inout) :: a + type(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine z_hlg_finalize + end subroutine z_cuda_hlg_finalize #else interface - subroutine psb_z_hlg_mold(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_mold(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hlg_mold + end subroutine psb_z_cuda_hlg_mold end interface #endif -end module psb_z_hlg_mat_mod +end module psb_z_cuda_hlg_mat_mod diff --git a/cuda/psb_z_hybg_mat_mod.F90 b/cuda/psb_z_cuda_hybg_mat_mod.F90 similarity index 52% rename from cuda/psb_z_hybg_mat_mod.F90 rename to cuda/psb_z_cuda_hybg_mat_mod.F90 index 465677e3..1bbc11b2 100644 --- a/cuda/psb_z_hybg_mat_mod.F90 +++ b/cuda/psb_z_cuda_hybg_mat_mod.F90 @@ -31,13 +31,13 @@ #if CUDA_SHORT_VERSION <= 10 -module psb_z_hybg_mat_mod +module psb_z_cuda_hybg_mat_mod use iso_c_binding use psb_z_mat_mod use cusparse_mod - type, extends(psb_z_csr_sparse_mat) :: psb_z_hybg_sparse_mat + type, extends(psb_z_csr_sparse_mat) :: psb_z_cuda_hybg_sparse_mat ! ! HYBG. An interface to the cuSPARSE HYB ! On the CPU side we keep a CSR storage. @@ -49,170 +49,170 @@ module psb_z_hybg_mat_mod type(z_Hmat) :: deviceMat contains - procedure, nopass :: get_fmt => z_hybg_get_fmt - procedure, pass(a) :: sizeof => z_hybg_sizeof - procedure, pass(a) :: vect_mv => psb_z_hybg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_z_hybg_inner_vect_sv - procedure, pass(a) :: csmm => psb_z_hybg_csmm - procedure, pass(a) :: csmv => psb_z_hybg_csmv - procedure, pass(a) :: scals => psb_z_hybg_scals - procedure, pass(a) :: scalv => psb_z_hybg_scal - procedure, pass(a) :: reallocate_nz => psb_z_hybg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_z_hybg_allocate_mnnz + procedure, nopass :: get_fmt => z_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_z_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_z_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_hybg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_hybg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_cp_hybg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_hybg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_mv_hybg_from_fmt - procedure, pass(a) :: free => z_hybg_free - procedure, pass(a) :: mold => psb_z_hybg_mold - procedure, pass(a) :: to_gpu => psb_z_hybg_to_gpu - final :: z_hybg_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => z_cuda_hybg_free + procedure, pass(a) :: mold => psb_z_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_hybg_to_gpu + final :: z_cuda_hybg_finalize #else contains - procedure, pass(a) :: mold => psb_z_hybg_mold + procedure, pass(a) :: mold => psb_z_cuda_hybg_mold #endif - end type psb_z_hybg_sparse_mat + end type psb_z_cuda_hybg_sparse_mat #ifdef HAVE_SPGPU - private :: z_hybg_get_nzeros, z_hybg_free, z_hybg_get_fmt, & - & z_hybg_get_size, z_hybg_sizeof, z_hybg_get_nz_row + private :: z_cuda_hybg_get_nzeros, z_cuda_hybg_free, z_cuda_hybg_get_fmt, & + & z_cuda_hybg_get_size, z_cuda_hybg_sizeof, z_cuda_hybg_get_nz_row interface - subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hybg_inner_vect_sv + end subroutine psb_z_cuda_hybg_inner_vect_sv end interface interface - subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hybg_vect_mv + end subroutine psb_z_cuda_hybg_vect_mv end interface interface - subroutine psb_z_hybg_reallocate_nz(nz,a) - import :: psb_z_hybg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_hybg_reallocate_nz(nz,a) + import :: psb_z_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_z_hybg_sparse_mat), intent(inout) :: a - end subroutine psb_z_hybg_reallocate_nz + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_hybg_reallocate_nz end interface interface - subroutine psb_z_hybg_allocate_mnnz(m,n,a,nz) - import :: psb_z_hybg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_z_hybg_allocate_mnnz + end subroutine psb_z_cuda_hybg_allocate_mnnz end interface interface - subroutine psb_z_hybg_mold(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_mold(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hybg_mold + end subroutine psb_z_cuda_hybg_mold end interface interface - subroutine psb_z_hybg_to_gpu(a,info, nzrm) - import :: psb_z_hybg_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_z_hybg_to_gpu + end subroutine psb_z_cuda_hybg_to_gpu end interface interface - subroutine psb_z_cp_hybg_from_coo(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_hybg_from_coo + end subroutine psb_z_cuda_cp_hybg_from_coo end interface interface - subroutine psb_z_cp_hybg_from_fmt(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_hybg_from_fmt + end subroutine psb_z_cuda_cp_hybg_from_fmt end interface interface - subroutine psb_z_mv_hybg_from_coo(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_hybg_from_coo + end subroutine psb_z_cuda_mv_hybg_from_coo end interface interface - subroutine psb_z_mv_hybg_from_fmt(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_hybg_from_fmt + end subroutine psb_z_cuda_mv_hybg_from_fmt end interface interface - subroutine psb_z_hybg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hybg_csmv + end subroutine psb_z_cuda_hybg_csmv end interface interface - subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hybg_csmm + end subroutine psb_z_cuda_hybg_csmm end interface interface - subroutine psb_z_hybg_scal(d,a,info,side) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hybg_scal(d,a,info,side) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_z_hybg_scal + end subroutine psb_z_cuda_hybg_scal end interface interface - subroutine psb_z_hybg_scals(d,a,info) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hybg_scals(d,a,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hybg_scals + end subroutine psb_z_cuda_hybg_scals end interface @@ -231,9 +231,9 @@ contains ! == =================================== - function z_hybg_sizeof(a) result(res) + function z_cuda_hybg_sizeof(a) result(res) implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 res = res + (2*psb_sizeof_dp) * size(a%val) @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function z_hybg_sizeof + end function z_cuda_hybg_sizeof - function z_hybg_get_fmt() result(res) + function z_cuda_hybg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HYBG' - end function z_hybg_get_fmt + end function z_cuda_hybg_get_fmt @@ -265,42 +265,42 @@ contains ! ! == =================================== - subroutine z_hybg_free(a) + subroutine z_cuda_hybg_free(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) call a%psb_z_csr_sparse_mat%free() return - end subroutine z_hybg_free + end subroutine z_cuda_hybg_free - subroutine z_hybg_finalize(a) + subroutine z_cuda_hybg_finalize(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - type(psb_z_hybg_sparse_mat), intent(inout) :: a + type(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) return - end subroutine z_hybg_finalize + end subroutine z_cuda_hybg_finalize #else interface - subroutine psb_z_hybg_mold(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_mold(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hybg_mold + end subroutine psb_z_cuda_hybg_mold end interface #endif -end module psb_z_hybg_mat_mod +end module psb_z_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_z_gpu_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 similarity index 72% rename from cuda/psb_z_gpu_vect_mod.F90 rename to cuda/psb_z_cuda_vect_mod.F90 index ca5ac922..35bfb4b5 100644 --- a/cuda/psb_z_gpu_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -30,15 +30,15 @@ ! -module psb_z_gpu_vect_mod +module psb_z_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod use psb_z_vect_mod use psb_i_vect_mod #ifdef HAVE_SPGPU - use psb_gpu_env_mod - use psb_i_gpu_vect_mod + use psb_cuda_env_mod + use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_z_vectordev_mod #endif @@ -47,7 +47,7 @@ module psb_z_gpu_vect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_z_base_vect_type) :: psb_z_vect_gpu + type, extends(psb_z_base_vect_type) :: psb_z_vect_cuda #ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr @@ -59,66 +59,66 @@ module psb_z_gpu_vect_mod type(c_ptr) :: i_buf = c_null_ptr integer :: i_buf_sz = 0 contains - procedure, pass(x) :: get_nrows => z_gpu_get_nrows - procedure, nopass :: get_fmt => z_gpu_get_fmt - - procedure, pass(x) :: all => z_gpu_all - procedure, pass(x) :: zero => z_gpu_zero - procedure, pass(x) :: asb_m => z_gpu_asb_m - procedure, pass(x) :: sync => z_gpu_sync - procedure, pass(x) :: sync_space => z_gpu_sync_space - procedure, pass(x) :: bld_x => z_gpu_bld_x - procedure, pass(x) :: bld_mn => z_gpu_bld_mn - procedure, pass(x) :: free => z_gpu_free - procedure, pass(x) :: ins_a => z_gpu_ins_a - procedure, pass(x) :: ins_v => z_gpu_ins_v - procedure, pass(x) :: is_host => z_gpu_is_host - procedure, pass(x) :: is_dev => z_gpu_is_dev - procedure, pass(x) :: is_sync => z_gpu_is_sync - procedure, pass(x) :: set_host => z_gpu_set_host - procedure, pass(x) :: set_dev => z_gpu_set_dev - procedure, pass(x) :: set_sync => z_gpu_set_sync - procedure, pass(x) :: set_scal => z_gpu_set_scal -!!$ procedure, pass(x) :: set_vect => z_gpu_set_vect - procedure, pass(x) :: gthzv_x => z_gpu_gthzv_x - procedure, pass(y) :: sctb => z_gpu_sctb - procedure, pass(y) :: sctb_x => z_gpu_sctb_x - procedure, pass(x) :: gthzbuf => z_gpu_gthzbuf - procedure, pass(y) :: sctb_buf => z_gpu_sctb_buf - procedure, pass(x) :: new_buffer => z_gpu_new_buffer - procedure, nopass :: device_wait => z_gpu_device_wait - procedure, pass(x) :: free_buffer => z_gpu_free_buffer - procedure, pass(x) :: maybe_free_buffer => z_gpu_maybe_free_buffer - procedure, pass(x) :: dot_v => z_gpu_dot_v - procedure, pass(x) :: dot_a => z_gpu_dot_a - procedure, pass(y) :: axpby_v => z_gpu_axpby_v - procedure, pass(y) :: axpby_a => z_gpu_axpby_a - procedure, pass(y) :: mlt_v => z_gpu_mlt_v - procedure, pass(y) :: mlt_a => z_gpu_mlt_a - procedure, pass(z) :: mlt_a_2 => z_gpu_mlt_a_2 - procedure, pass(z) :: mlt_v_2 => z_gpu_mlt_v_2 - procedure, pass(x) :: scal => z_gpu_scal - procedure, pass(x) :: nrm2 => z_gpu_nrm2 - procedure, pass(x) :: amax => z_gpu_amax - procedure, pass(x) :: asum => z_gpu_asum - procedure, pass(x) :: absval1 => z_gpu_absval1 - procedure, pass(x) :: absval2 => z_gpu_absval2 - - final :: z_gpu_vect_finalize + procedure, pass(x) :: get_nrows => z_cuda_get_nrows + procedure, nopass :: get_fmt => z_cuda_get_fmt + + procedure, pass(x) :: all => z_cuda_all + procedure, pass(x) :: zero => z_cuda_zero + procedure, pass(x) :: asb_m => z_cuda_asb_m + procedure, pass(x) :: sync => z_cuda_sync + procedure, pass(x) :: sync_space => z_cuda_sync_space + procedure, pass(x) :: bld_x => z_cuda_bld_x + procedure, pass(x) :: bld_mn => z_cuda_bld_mn + procedure, pass(x) :: free => z_cuda_free + procedure, pass(x) :: ins_a => z_cuda_ins_a + procedure, pass(x) :: ins_v => z_cuda_ins_v + procedure, pass(x) :: is_host => z_cuda_is_host + procedure, pass(x) :: is_dev => z_cuda_is_dev + procedure, pass(x) :: is_sync => z_cuda_is_sync + procedure, pass(x) :: set_host => z_cuda_set_host + procedure, pass(x) :: set_dev => z_cuda_set_dev + procedure, pass(x) :: set_sync => z_cuda_set_sync + procedure, pass(x) :: set_scal => z_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => z_cuda_set_vect + procedure, pass(x) :: gthzv_x => z_cuda_gthzv_x + procedure, pass(y) :: sctb => z_cuda_sctb + procedure, pass(y) :: sctb_x => z_cuda_sctb_x + procedure, pass(x) :: gthzbuf => z_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => z_cuda_sctb_buf + procedure, pass(x) :: new_buffer => z_cuda_new_buffer + procedure, nopass :: device_wait => z_cuda_device_wait + procedure, pass(x) :: free_buffer => z_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => z_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => z_cuda_dot_v + procedure, pass(x) :: dot_a => z_cuda_dot_a + procedure, pass(y) :: axpby_v => z_cuda_axpby_v + procedure, pass(y) :: axpby_a => z_cuda_axpby_a + procedure, pass(y) :: mlt_v => z_cuda_mlt_v + procedure, pass(y) :: mlt_a => z_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => z_cuda_mlt_v_2 + procedure, pass(x) :: scal => z_cuda_scal + procedure, pass(x) :: nrm2 => z_cuda_nrm2 + procedure, pass(x) :: amax => z_cuda_amax + procedure, pass(x) :: asum => z_cuda_asum + procedure, pass(x) :: absval1 => z_cuda_absval1 + procedure, pass(x) :: absval2 => z_cuda_absval2 + + final :: z_cuda_vect_finalize #endif - end type psb_z_vect_gpu + end type psb_z_vect_cuda - public :: psb_z_vect_gpu_ + public :: psb_z_vect_cuda_ private :: constructor - interface psb_z_vect_gpu_ + interface psb_z_vect_cuda_ module procedure constructor - end interface psb_z_vect_gpu_ + end interface psb_z_vect_cuda_ contains function constructor(x) result(this) complex(psb_dpk_) :: x(:) - type(psb_z_vect_gpu) :: this + type(psb_z_vect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -128,20 +128,20 @@ contains #ifdef HAVE_SPGPU - subroutine z_gpu_device_wait() + subroutine z_cuda_device_wait() call psb_cudaSync() - end subroutine z_gpu_device_wait + end subroutine z_cuda_device_wait - subroutine z_gpu_new_buffer(n,x,info) + subroutine z_cuda_new_buffer(n,x,info) use psb_realloc_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(x%combuf)) then if (size(x%combuf) idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then ! ! Only need a sync in this branch; in the others ! cudamemCpy acts as a sync point. @@ -331,14 +331,14 @@ contains end select - end subroutine z_gpu_gthzv_x + end subroutine z_cuda_gthzv_x - subroutine z_gpu_gthzbuf(i,n,idx,x) - use psb_gpu_env_mod + subroutine z_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i,n class(psb_i_base_vect_type) :: idx - class(psb_z_vect_gpu) :: x + class(psb_z_vect_cuda) :: x integer :: info, ni info = 0 @@ -349,11 +349,11 @@ contains end if select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) @@ -384,14 +384,14 @@ contains end select - end subroutine z_gpu_gthzbuf + end subroutine z_cuda_gthzbuf - subroutine z_gpu_sctb(n,idx,x,beta,y) + subroutine z_cuda_sctb(n,idx,x,beta,y) implicit none !use psb_const_mod integer(psb_ipk_) :: n, idx(:) complex(psb_dpk_) :: beta, x(:) - class(psb_z_vect_gpu) :: y + class(psb_z_vect_cuda) :: y integer(psb_ipk_) :: info if (n == 0) return @@ -401,24 +401,24 @@ contains call y%psb_z_base_vect_type%sctb(n,idx,x,beta) call y%set_host() - end subroutine z_gpu_sctb + end subroutine z_cuda_sctb - subroutine z_gpu_sctb_x(i,n,idx,x,beta,y) - use psb_gpu_env_mod + subroutine z_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx complex(psb_dpk_) :: beta, x(:) - class(psb_z_vect_gpu) :: y + class(psb_z_vect_cuda) :: y integer :: info, ni select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() ! - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(y%pinned_buffer)) then if (size(y%pinned_buffer) < n) then call inner_unregister(y%pinned_buffer) @@ -506,16 +506,16 @@ contains call psb_cudaSync() call y%set_dev() - end subroutine z_gpu_sctb_x + end subroutine z_cuda_sctb_x - subroutine z_gpu_sctb_buf(i,n,idx,beta,y) + subroutine z_cuda_sctb_buf(i,n,idx,beta,y) use psi_serial_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx complex(psb_dpk_) :: beta - class(psb_z_vect_gpu) :: y + class(psb_z_vect_cuda) :: y integer(psb_ipk_) :: info, ni !!$ write(0,*) 'Starting sctb_buf' @@ -526,11 +526,11 @@ contains select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) else @@ -557,106 +557,106 @@ contains end select !!$ write(0,*) 'Done sctb_buf' - end subroutine z_gpu_sctb_buf + end subroutine z_cuda_sctb_buf - subroutine z_gpu_bld_x(x,this) + subroutine z_cuda_bld_x(x,this) use psb_base_mod complex(psb_dpk_), intent(in) :: this(:) - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call psb_realloc(size(this),x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'z_gpu_bld_x',& + call psb_errpush(info,'z_cuda_bld_x',& & i_err=(/size(this),izero,izero,izero,izero/)) end if x%v(:) = this(:) call x%set_host() call x%sync() - end subroutine z_gpu_bld_x + end subroutine z_cuda_bld_x - subroutine z_gpu_bld_mn(x,n) + subroutine z_cuda_bld_mn(x,n) integer(psb_mpk_), intent(in) :: n - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(n,info) if (info /= 0) then - call psb_errpush(info,'z_gpu_bld_n',i_err=(/n,n,n,n,n/)) + call psb_errpush(info,'z_cuda_bld_n',i_err=(/n,n,n,n,n/)) end if - end subroutine z_gpu_bld_mn + end subroutine z_cuda_bld_mn - subroutine z_gpu_set_host(x) + subroutine z_cuda_set_host(x) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x x%state = is_host - end subroutine z_gpu_set_host + end subroutine z_cuda_set_host - subroutine z_gpu_set_dev(x) + subroutine z_cuda_set_dev(x) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x x%state = is_dev - end subroutine z_gpu_set_dev + end subroutine z_cuda_set_dev - subroutine z_gpu_set_sync(x) + subroutine z_cuda_set_sync(x) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x x%state = is_sync - end subroutine z_gpu_set_sync + end subroutine z_cuda_set_sync - function z_gpu_is_dev(x) result(res) + function z_cuda_is_dev(x) result(res) implicit none - class(psb_z_vect_gpu), intent(in) :: x + class(psb_z_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function z_gpu_is_dev + end function z_cuda_is_dev - function z_gpu_is_host(x) result(res) + function z_cuda_is_host(x) result(res) implicit none - class(psb_z_vect_gpu), intent(in) :: x + class(psb_z_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function z_gpu_is_host + end function z_cuda_is_host - function z_gpu_is_sync(x) result(res) + function z_cuda_is_sync(x) result(res) implicit none - class(psb_z_vect_gpu), intent(in) :: x + class(psb_z_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function z_gpu_is_sync + end function z_cuda_is_sync - function z_gpu_get_nrows(x) result(res) + function z_cuda_get_nrows(x) result(res) implicit none - class(psb_z_vect_gpu), intent(in) :: x + class(psb_z_vect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = size(x%v) - end function z_gpu_get_nrows + end function z_cuda_get_nrows - function z_gpu_get_fmt() result(res) + function z_cuda_get_fmt() result(res) implicit none character(len=5) :: res res = 'zGPU' - end function z_gpu_get_fmt + end function z_cuda_get_fmt - subroutine z_gpu_all(n, x, info) + subroutine z_cuda_all(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: n - class(psb_z_vect_gpu), intent(out) :: x + class(psb_z_vect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(n,x%v,info) @@ -664,26 +664,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'z_gpu_all',& + call psb_errpush(info,'z_cuda_all',& & i_err=(/n,n,n,n,n/)) end if - end subroutine z_gpu_all + end subroutine z_cuda_all - subroutine z_gpu_zero(x) + subroutine z_cuda_zero(x) use psi_serial_mod implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=zzero call x%set_host() - end subroutine z_gpu_zero + end subroutine z_cuda_zero - subroutine z_gpu_asb_m(n, x, info) + subroutine z_cuda_asb_m(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_mpk_), intent(in) :: n - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: nd @@ -703,12 +703,12 @@ contains end if end if - end subroutine z_gpu_asb_m + end subroutine z_cuda_asb_m - subroutine z_gpu_sync_space(x,info) + subroutine z_cuda_sync_space(x,info) use psb_base_mod, only : psb_realloc implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh, nd @@ -747,12 +747,12 @@ contains end if end if - end subroutine z_gpu_sync_space + end subroutine z_cuda_sync_space - subroutine z_gpu_sync(x) + subroutine z_cuda_sync(x) use psb_base_mod, only : psb_realloc implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -778,31 +778,31 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'z_gpu_sync') + call psb_errpush(info,'z_cuda_sync') end if - end subroutine z_gpu_sync + end subroutine z_cuda_sync - subroutine z_gpu_free(x, info) + subroutine z_cuda_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) if (c_associated(x%deviceVect)) then -!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' call freeMultiVecDevice(x%deviceVect) x%deviceVect=c_null_ptr end if call x%free_buffer(info) call x%set_sync() - end subroutine z_gpu_free + end subroutine z_cuda_free - subroutine z_gpu_set_scal(x,val,first,last) - class(psb_z_vect_gpu), intent(inout) :: x + subroutine z_cuda_set_scal(x,val,first,last) + class(psb_z_vect_cuda), intent(inout) :: x complex(psb_dpk_), intent(in) :: val integer(psb_ipk_), optional :: first, last @@ -817,10 +817,10 @@ contains info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() - end subroutine z_gpu_set_scal + end subroutine z_cuda_set_scal !!$ -!!$ subroutine z_gpu_set_vect(x,val) -!!$ class(psb_z_vect_gpu), intent(inout) :: x +!!$ subroutine z_cuda_set_vect(x,val) +!!$ class(psb_z_vect_cuda), intent(inout) :: x !!$ complex(psb_dpk_), intent(in) :: val(:) !!$ integer(psb_ipk_) :: nr !!$ integer(psb_ipk_) :: info @@ -829,13 +829,13 @@ contains !!$ call x%psb_z_base_vect_type%set_vect(val) !!$ call x%set_host() !!$ -!!$ end subroutine z_gpu_set_vect +!!$ end subroutine z_cuda_set_vect - function z_gpu_dot_v(n,x,y) result(res) + function z_cuda_dot_v(n,x,y) result(res) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n complex(psb_dpk_) :: res @@ -852,13 +852,13 @@ contains type is (psb_z_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n,x%v,1,yy%v,1) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) if (info /= 0) then info = psb_err_internal_error_ - call psb_errpush(info,'z_gpu_dot_v') + call psb_errpush(info,'z_cuda_dot_v') end if class default @@ -867,11 +867,11 @@ contains res = y%dot(n,x%v) end select - end function z_gpu_dot_v + end function z_cuda_dot_v - function z_gpu_dot_a(n,x,y) result(res) + function z_cuda_dot_a(n,x,y) result(res) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x complex(psb_dpk_), intent(in) :: y(:) integer(psb_ipk_), intent(in) :: n complex(psb_dpk_) :: res @@ -880,14 +880,14 @@ contains if (x%is_dev()) call x%sync() res = ddot(n,y,1,x%v,1) - end function z_gpu_dot_a + end function z_cuda_dot_a - subroutine z_gpu_axpby_v(m,alpha, x, beta, y, info) + subroutine z_cuda_axpby_v(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m class(psb_z_base_vect_type), intent(inout) :: x - class(psb_z_vect_gpu), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: y complex(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny @@ -895,7 +895,7 @@ contains info = psb_success_ select type(xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) ! Do something different here if ((beta /= zzero).and.y%is_host())& & call y%sync() @@ -915,14 +915,14 @@ contains call y%axpby(m,alpha,x%v,beta,info) end select - end subroutine z_gpu_axpby_v + end subroutine z_cuda_axpby_v - subroutine z_gpu_axpby_a(m,alpha, x, beta, y, info) + subroutine z_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m complex(psb_dpk_), intent(in) :: x(:) - class(psb_z_vect_gpu), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: y complex(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info @@ -930,13 +930,13 @@ contains & call y%sync() call psb_geaxpby(m,alpha,x,beta,y%v,info) call y%set_host() - end subroutine z_gpu_axpby_a + end subroutine z_cuda_axpby_a - subroutine z_gpu_mlt_v(x, y, info) + subroutine z_cuda_mlt_v(x, y, info) use psi_serial_mod implicit none class(psb_z_base_vect_type), intent(inout) :: x - class(psb_z_vect_gpu), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -950,7 +950,7 @@ contains y%v(i) = y%v(i) * xx%v(i) end do call y%set_host() - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) ! Do something different here if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -963,13 +963,13 @@ contains call y%set_host() end select - end subroutine z_gpu_mlt_v + end subroutine z_cuda_mlt_v - subroutine z_gpu_mlt_a(x, y, info) + subroutine z_cuda_mlt_a(x, y, info) use psi_serial_mod implicit none complex(psb_dpk_), intent(in) :: x(:) - class(psb_z_vect_gpu), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -977,15 +977,15 @@ contains if (y%is_dev()) call y%sync() call y%psb_z_base_vect_type%mlt(x,info) ! set_host() is invoked in the base method - end subroutine z_gpu_mlt_a + end subroutine z_cuda_mlt_a - subroutine z_gpu_mlt_a_2(alpha,x,y,beta,z,info) + subroutine z_cuda_mlt_a_2(alpha,x,y,beta,z,info) use psi_serial_mod implicit none complex(psb_dpk_), intent(in) :: alpha,beta complex(psb_dpk_), intent(in) :: x(:) complex(psb_dpk_), intent(in) :: y(:) - class(psb_z_vect_gpu), intent(inout) :: z + class(psb_z_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -993,16 +993,16 @@ contains if (z%is_dev()) call z%sync() call z%psb_z_base_vect_type%mlt(alpha,x,y,beta,info) ! set_host() is invoked in the base method - end subroutine z_gpu_mlt_a_2 + end subroutine z_cuda_mlt_a_2 - subroutine z_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + subroutine z_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) use psi_serial_mod use psb_string_mod implicit none complex(psb_dpk_), intent(in) :: alpha,beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y - class(psb_z_vect_gpu), intent(inout) :: z + class(psb_z_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n @@ -1025,9 +1025,9 @@ contains ! info = 0 select type(xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type (yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= zzero).and.(z%is_host())) call z%sync() @@ -1049,23 +1049,23 @@ contains call z%psb_z_base_vect_type%mlt(alpha,x,y,beta,info) call z%set_host() end select - end subroutine z_gpu_mlt_v_2 + end subroutine z_cuda_mlt_v_2 - subroutine z_gpu_scal(alpha, x) + subroutine z_cuda_scal(alpha, x) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x complex(psb_dpk_), intent (in) :: alpha integer(psb_ipk_) :: info if (x%is_host()) call x%sync() info = scalMultiVecDevice(alpha,x%deviceVect) call x%set_dev() - end subroutine z_gpu_scal + end subroutine z_cuda_scal - function z_gpu_nrm2(n,x) result(res) + function z_cuda_nrm2(n,x) result(res) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1073,11 +1073,11 @@ contains if (x%is_host()) call x%sync() info = nrm2MultiVecDeviceComplex(res,n,x%deviceVect) - end function z_gpu_nrm2 + end function z_cuda_nrm2 - function z_gpu_amax(n,x) result(res) + function z_cuda_amax(n,x) result(res) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1085,11 +1085,11 @@ contains if (x%is_host()) call x%sync() info = amaxMultiVecDeviceComplex(res,n,x%deviceVect) - end function z_gpu_amax + end function z_cuda_amax - function z_gpu_asum(n,x) result(res) + function z_cuda_asum(n,x) result(res) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1097,11 +1097,11 @@ contains if (x%is_host()) call x%sync() info = asumMultiVecDeviceComplex(res,n,x%deviceVect) - end function z_gpu_asum + end function z_cuda_asum - subroutine z_gpu_absval1(x) + subroutine z_cuda_absval1(x) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n integer(psb_ipk_) :: info @@ -1109,18 +1109,18 @@ contains n=x%get_nrows() info = absMultiVecDevice(n,zone,x%deviceVect) - end subroutine z_gpu_absval1 + end subroutine z_cuda_absval1 - subroutine z_gpu_absval2(x,y) + subroutine z_cuda_absval2(x,y) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_) :: n integer(psb_ipk_) :: info n=min(x%get_nrows(),y%get_nrows()) select type (yy=> y) - class is (psb_z_vect_gpu) + class is (psb_z_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = absMultiVecDevice(n,zone,x%deviceVect,yy%deviceVect) @@ -1129,67 +1129,67 @@ contains if (y%is_dev()) call y%sync() call x%psb_z_base_vect_type%absval(y) end select - end subroutine z_gpu_absval2 + end subroutine z_cuda_absval2 - subroutine z_gpu_vect_finalize(x) + subroutine z_cuda_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_z_vect_gpu), intent(inout) :: x + type(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 call x%free(info) - end subroutine z_gpu_vect_finalize + end subroutine z_cuda_vect_finalize - subroutine z_gpu_ins_v(n,irl,val,dupl,x,info) + subroutine z_cuda_ins_v(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl class(psb_i_base_vect_type), intent(inout) :: irl class(psb_z_base_vect_type), intent(inout) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, isz - logical :: done_gpu + logical :: done_cuda info = 0 if (psb_errstatus_fatal()) return - done_gpu = .false. + done_cuda = .false. select type(virl => irl) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type(vval => val) - class is (psb_z_vect_gpu) + class is (psb_z_vect_cuda) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() info = geinsMultiVecDeviceDoubleComplex(n,virl%deviceVect,& & vval%deviceVect,dupl,1,x%deviceVect) call x%set_dev() - done_gpu=.true. + done_cuda=.true. end select end select - if (.not.done_gpu) then + if (.not.done_cuda) then if (irl%is_dev()) call irl%sync() if (val%is_dev()) call val%sync() call x%ins(n,irl%v,val%v,dupl,info) end if if (info /= 0) then - call psb_errpush(info,'gpu_vect_ins') + call psb_errpush(info,'cuda_vect_ins') return end if - end subroutine z_gpu_ins_v + end subroutine z_cuda_ins_v - subroutine z_gpu_ins_a(n,irl,val,dupl,x,info) + subroutine z_cuda_ins_a(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) complex(psb_dpk_), intent(in) :: val(:) @@ -1202,11 +1202,11 @@ contains call x%psb_z_base_vect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine z_gpu_ins_a + end subroutine z_cuda_ins_a #endif -end module psb_z_gpu_vect_mod +end module psb_z_cuda_vect_mod ! @@ -1215,7 +1215,7 @@ end module psb_z_gpu_vect_mod -module psb_z_gpu_multivect_mod +module psb_z_cuda_multivect_mod use iso_c_binding use psb_const_mod use psb_error_mod @@ -1224,7 +1224,7 @@ module psb_z_gpu_multivect_mod use psb_i_multivect_mod #ifdef HAVE_SPGPU - use psb_i_gpu_multivect_mod + use psb_i_cuda_multivect_mod use psb_z_vectordev_mod #endif @@ -1232,7 +1232,7 @@ module psb_z_gpu_multivect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_z_base_multivect_type) :: psb_z_multivect_gpu + type, extends(psb_z_base_multivect_type) :: psb_z_multivect_cuda #ifdef HAVE_SPGPU integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 @@ -1240,48 +1240,48 @@ module psb_z_gpu_multivect_mod real(c_double), allocatable :: buffer(:,:) type(c_ptr) :: dt_buf = c_null_ptr contains - procedure, pass(x) :: get_nrows => z_gpu_multi_get_nrows - procedure, pass(x) :: get_ncols => z_gpu_multi_get_ncols - procedure, nopass :: get_fmt => z_gpu_multi_get_fmt -!!$ procedure, pass(x) :: dot_v => z_gpu_multi_dot_v -!!$ procedure, pass(x) :: dot_a => z_gpu_multi_dot_a -!!$ procedure, pass(y) :: axpby_v => z_gpu_multi_axpby_v -!!$ procedure, pass(y) :: axpby_a => z_gpu_multi_axpby_a -!!$ procedure, pass(y) :: mlt_v => z_gpu_multi_mlt_v -!!$ procedure, pass(y) :: mlt_a => z_gpu_multi_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => z_gpu_multi_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => z_gpu_multi_mlt_v_2 -!!$ procedure, pass(x) :: scal => z_gpu_multi_scal -!!$ procedure, pass(x) :: nrm2 => z_gpu_multi_nrm2 -!!$ procedure, pass(x) :: amax => z_gpu_multi_amax -!!$ procedure, pass(x) :: asum => z_gpu_multi_asum - procedure, pass(x) :: all => z_gpu_multi_all - procedure, pass(x) :: zero => z_gpu_multi_zero - procedure, pass(x) :: asb => z_gpu_multi_asb - procedure, pass(x) :: sync => z_gpu_multi_sync - procedure, pass(x) :: sync_space => z_gpu_multi_sync_space - procedure, pass(x) :: bld_x => z_gpu_multi_bld_x - procedure, pass(x) :: bld_n => z_gpu_multi_bld_n - procedure, pass(x) :: free => z_gpu_multi_free - procedure, pass(x) :: ins => z_gpu_multi_ins - procedure, pass(x) :: is_host => z_gpu_multi_is_host - procedure, pass(x) :: is_dev => z_gpu_multi_is_dev - procedure, pass(x) :: is_sync => z_gpu_multi_is_sync - procedure, pass(x) :: set_host => z_gpu_multi_set_host - procedure, pass(x) :: set_dev => z_gpu_multi_set_dev - procedure, pass(x) :: set_sync => z_gpu_multi_set_sync - procedure, pass(x) :: set_scal => z_gpu_multi_set_scal - procedure, pass(x) :: set_vect => z_gpu_multi_set_vect -!!$ procedure, pass(x) :: gthzv_x => z_gpu_multi_gthzv_x -!!$ procedure, pass(y) :: sctb => z_gpu_multi_sctb -!!$ procedure, pass(y) :: sctb_x => z_gpu_multi_sctb_x - final :: z_gpu_multi_vect_finalize + procedure, pass(x) :: get_nrows => z_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => z_cuda_multi_get_ncols + procedure, nopass :: get_fmt => z_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => z_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => z_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => z_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => z_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => z_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => z_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => z_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => z_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => z_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => z_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => z_cuda_multi_amax +!!$ procedure, pass(x) :: asum => z_cuda_multi_asum + procedure, pass(x) :: all => z_cuda_multi_all + procedure, pass(x) :: zero => z_cuda_multi_zero + procedure, pass(x) :: asb => z_cuda_multi_asb + procedure, pass(x) :: sync => z_cuda_multi_sync + procedure, pass(x) :: sync_space => z_cuda_multi_sync_space + procedure, pass(x) :: bld_x => z_cuda_multi_bld_x + procedure, pass(x) :: bld_n => z_cuda_multi_bld_n + procedure, pass(x) :: free => z_cuda_multi_free + procedure, pass(x) :: ins => z_cuda_multi_ins + procedure, pass(x) :: is_host => z_cuda_multi_is_host + procedure, pass(x) :: is_dev => z_cuda_multi_is_dev + procedure, pass(x) :: is_sync => z_cuda_multi_is_sync + procedure, pass(x) :: set_host => z_cuda_multi_set_host + procedure, pass(x) :: set_dev => z_cuda_multi_set_dev + procedure, pass(x) :: set_sync => z_cuda_multi_set_sync + procedure, pass(x) :: set_scal => z_cuda_multi_set_scal + procedure, pass(x) :: set_vect => z_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => z_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => z_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => z_cuda_multi_sctb_x + final :: z_cuda_multi_vect_finalize #endif - end type psb_z_multivect_gpu + end type psb_z_multivect_cuda - public :: psb_z_multivect_gpu + public :: psb_z_multivect_cuda private :: constructor - interface psb_z_multivect_gpu + interface psb_z_multivect_cuda module procedure constructor end interface @@ -1289,7 +1289,7 @@ contains function constructor(x) result(this) complex(psb_dpk_) :: x(:,:) - type(psb_z_multivect_gpu) :: this + type(psb_z_multivect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -1299,15 +1299,15 @@ contains #ifdef HAVE_SPGPU -!!$ subroutine z_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ subroutine z_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i,n !!$ class(psb_i_base_multivect_type) :: idx !!$ complex(psb_dpk_) :: y(:) -!!$ class(psb_z_multivect_gpu) :: x +!!$ class(psb_z_multivect_cuda) :: x !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (x%is_host()) call x%sync() !!$ @@ -1332,16 +1332,16 @@ contains !!$ end select !!$ !!$ -!!$ end subroutine z_gpu_multi_gthzv_x +!!$ end subroutine z_cuda_multi_gthzv_x !!$ !!$ !!$ -!!$ subroutine z_gpu_multi_sctb(n,idx,x,beta,y) +!!$ subroutine z_cuda_multi_sctb(n,idx,x,beta,y) !!$ implicit none !!$ !use psb_const_mod !!$ integer(psb_ipk_) :: n, idx(:) !!$ complex(psb_dpk_) :: beta, x(:) -!!$ class(psb_z_multivect_gpu) :: y +!!$ class(psb_z_multivect_cuda) :: y !!$ integer(psb_ipk_) :: info !!$ !!$ if (n == 0) return @@ -1351,17 +1351,17 @@ contains !!$ call y%psb_z_base_multivect_type%sctb(n,idx,x,beta) !!$ call y%set_host() !!$ -!!$ end subroutine z_gpu_multi_sctb +!!$ end subroutine z_cuda_multi_sctb !!$ -!!$ subroutine z_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ subroutine z_cuda_multi_sctb_x(i,n,idx,x,beta,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i, n !!$ class(psb_i_base_multivect_type) :: idx !!$ complex(psb_dpk_) :: beta, x(:) -!!$ class(psb_z_multivect_gpu) :: y +!!$ class(psb_z_multivect_cuda) :: y !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (y%is_host()) call y%sync() !!$ @@ -1387,13 +1387,13 @@ contains !!$ call y%sct(n,ii%v(i:),x,beta) !!$ end select !!$ -!!$ end subroutine z_gpu_multi_sctb_x +!!$ end subroutine z_cuda_multi_sctb_x - subroutine z_gpu_multi_bld_x(x,this) + subroutine z_cuda_multi_bld_x(x,this) use psb_base_mod complex(psb_dpk_), intent(in) :: this(:,:) - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info, m, n m=size(this,1) @@ -1403,101 +1403,101 @@ contains call psb_realloc(m,n,x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'z_gpu_multi_bld_x',& + call psb_errpush(info,'z_cuda_multi_bld_x',& & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) end if x%v(1:m,1:n) = this(1:m,1:n) call x%set_host() call x%sync() - end subroutine z_gpu_multi_bld_x + end subroutine z_cuda_multi_bld_x - subroutine z_gpu_multi_bld_n(x,m,n) + subroutine z_cuda_multi_bld_n(x,m,n) integer(psb_ipk_), intent(in) :: m,n - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(m,n,info) if (info /= 0) then - call psb_errpush(info,'z_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + call psb_errpush(info,'z_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) end if - end subroutine z_gpu_multi_bld_n + end subroutine z_cuda_multi_bld_n - subroutine z_gpu_multi_set_host(x) + subroutine z_cuda_multi_set_host(x) implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x x%state = is_host - end subroutine z_gpu_multi_set_host + end subroutine z_cuda_multi_set_host - subroutine z_gpu_multi_set_dev(x) + subroutine z_cuda_multi_set_dev(x) implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x x%state = is_dev - end subroutine z_gpu_multi_set_dev + end subroutine z_cuda_multi_set_dev - subroutine z_gpu_multi_set_sync(x) + subroutine z_cuda_multi_set_sync(x) implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x x%state = is_sync - end subroutine z_gpu_multi_set_sync + end subroutine z_cuda_multi_set_sync - function z_gpu_multi_is_dev(x) result(res) + function z_cuda_multi_is_dev(x) result(res) implicit none - class(psb_z_multivect_gpu), intent(in) :: x + class(psb_z_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function z_gpu_multi_is_dev + end function z_cuda_multi_is_dev - function z_gpu_multi_is_host(x) result(res) + function z_cuda_multi_is_host(x) result(res) implicit none - class(psb_z_multivect_gpu), intent(in) :: x + class(psb_z_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function z_gpu_multi_is_host + end function z_cuda_multi_is_host - function z_gpu_multi_is_sync(x) result(res) + function z_cuda_multi_is_sync(x) result(res) implicit none - class(psb_z_multivect_gpu), intent(in) :: x + class(psb_z_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function z_gpu_multi_is_sync + end function z_cuda_multi_is_sync - function z_gpu_multi_get_nrows(x) result(res) + function z_cuda_multi_get_nrows(x) result(res) implicit none - class(psb_z_multivect_gpu), intent(in) :: x + class(psb_z_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_nrows - end function z_gpu_multi_get_nrows + end function z_cuda_multi_get_nrows - function z_gpu_multi_get_ncols(x) result(res) + function z_cuda_multi_get_ncols(x) result(res) implicit none - class(psb_z_multivect_gpu), intent(in) :: x + class(psb_z_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_ncols - end function z_gpu_multi_get_ncols + end function z_cuda_multi_get_ncols - function z_gpu_multi_get_fmt() result(res) + function z_cuda_multi_get_fmt() result(res) implicit none character(len=5) :: res res = 'zGPU' - end function z_gpu_multi_get_fmt + end function z_cuda_multi_get_fmt -!!$ function z_gpu_multi_dot_v(n,x,y) result(res) +!!$ function z_cuda_multi_dot_v(n,x,y) result(res) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ class(psb_z_base_multivect_type), intent(inout) :: y !!$ integer(psb_ipk_), intent(in) :: n !!$ complex(psb_dpk_) :: res @@ -1514,13 +1514,13 @@ contains !!$ type is (psb_z_base_multivect_type) !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,x%v,1,yy%v,1) -!!$ type is (psb_z_multivect_gpu) +!!$ type is (psb_z_multivect_cuda) !!$ if (x%is_host()) call x%sync() !!$ if (yy%is_host()) call yy%sync() !!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) !!$ if (info /= 0) then !!$ info = psb_err_internal_error_ -!!$ call psb_errpush(info,'z_gpu_multi_dot_v') +!!$ call psb_errpush(info,'z_cuda_multi_dot_v') !!$ end if !!$ !!$ class default @@ -1529,11 +1529,11 @@ contains !!$ res = y%dot(n,x%v) !!$ end select !!$ -!!$ end function z_gpu_multi_dot_v +!!$ end function z_cuda_multi_dot_v !!$ -!!$ function z_gpu_multi_dot_a(n,x,y) result(res) +!!$ function z_cuda_multi_dot_a(n,x,y) result(res) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ complex(psb_dpk_), intent(in) :: y(:) !!$ integer(psb_ipk_), intent(in) :: n !!$ complex(psb_dpk_) :: res @@ -1542,14 +1542,14 @@ contains !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,y,1,x%v,1) !!$ -!!$ end function z_gpu_multi_dot_a +!!$ end function z_cuda_multi_dot_a !!$ -!!$ subroutine z_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine z_cuda_multi_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ class(psb_z_base_multivect_type), intent(inout) :: x -!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ class(psb_z_multivect_cuda), intent(inout) :: y !!$ complex(psb_dpk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: nx, ny @@ -1562,7 +1562,7 @@ contains !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() -!!$ type is (psb_z_multivect_gpu) +!!$ type is (psb_z_multivect_cuda) !!$ ! Do something different here !!$ if ((beta /= dzero).and.y%is_host())& !!$ & call y%sync() @@ -1581,27 +1581,27 @@ contains !!$ call y%axpby(m,alpha,x%v,beta,info) !!$ end select !!$ -!!$ end subroutine z_gpu_multi_axpby_v +!!$ end subroutine z_cuda_multi_axpby_v !!$ -!!$ subroutine z_gpu_multi_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine z_cuda_multi_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ complex(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ class(psb_z_multivect_cuda), intent(inout) :: y !!$ complex(psb_dpk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ if (y%is_dev()) call y%sync() !!$ call psb_geaxpby(m,alpha,x,beta,y%v,info) !!$ call y%set_host() -!!$ end subroutine z_gpu_multi_axpby_a +!!$ end subroutine z_cuda_multi_axpby_a !!$ -!!$ subroutine z_gpu_multi_mlt_v(x, y, info) +!!$ subroutine z_cuda_multi_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_z_base_multivect_type), intent(inout) :: x -!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ class(psb_z_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ integer(psb_ipk_) :: i, n @@ -1615,7 +1615,7 @@ contains !!$ y%v(i) = y%v(i) * xx%v(i) !!$ end do !!$ call y%set_host() -!!$ type is (psb_z_multivect_gpu) +!!$ type is (psb_z_multivect_cuda) !!$ ! Do something different here !!$ if (y%is_host()) call y%sync() !!$ if (xx%is_host()) call xx%sync() @@ -1627,13 +1627,13 @@ contains !!$ call y%set_host() !!$ end select !!$ -!!$ end subroutine z_gpu_multi_mlt_v +!!$ end subroutine z_cuda_multi_mlt_v !!$ -!!$ subroutine z_gpu_multi_mlt_a(x, y, info) +!!$ subroutine z_cuda_multi_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ class(psb_z_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1641,15 +1641,15 @@ contains !!$ call y%sync() !!$ call y%psb_z_base_multivect_type%mlt(x,info) !!$ call y%set_host() -!!$ end subroutine z_gpu_multi_mlt_a +!!$ end subroutine z_cuda_multi_mlt_a !!$ -!!$ subroutine z_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine z_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_dpk_), intent(in) :: alpha,beta !!$ complex(psb_dpk_), intent(in) :: x(:) !!$ complex(psb_dpk_), intent(in) :: y(:) -!!$ class(psb_z_multivect_gpu), intent(inout) :: z +!!$ class(psb_z_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1657,16 +1657,16 @@ contains !!$ if (z%is_dev()) call z%sync() !!$ call z%psb_z_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() -!!$ end subroutine z_gpu_multi_mlt_a_2 +!!$ end subroutine z_cuda_multi_mlt_a_2 !!$ -!!$ subroutine z_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ subroutine z_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod !!$ implicit none !!$ complex(psb_dpk_), intent(in) :: alpha,beta !!$ class(psb_z_base_multivect_type), intent(inout) :: x !!$ class(psb_z_base_multivect_type), intent(inout) :: y -!!$ class(psb_z_multivect_gpu), intent(inout) :: z +!!$ class(psb_z_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ character(len=1), intent(in), optional :: conjgx, conjgy !!$ integer(psb_ipk_) :: i, n @@ -1689,9 +1689,9 @@ contains !!$ ! !!$ info = 0 !!$ select type(xx => x) -!!$ type is (psb_z_multivect_gpu) +!!$ type is (psb_z_multivect_cuda) !!$ select type (yy => y) -!!$ type is (psb_z_multivect_gpu) +!!$ type is (psb_z_multivect_cuda) !!$ if (xx%is_host()) call xx%sync() !!$ if (yy%is_host()) call yy%sync() !!$ ! Z state is irrelevant: it will be done on the GPU. @@ -1711,11 +1711,11 @@ contains !!$ call z%psb_z_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() !!$ end select -!!$ end subroutine z_gpu_multi_mlt_v_2 +!!$ end subroutine z_cuda_multi_mlt_v_2 - subroutine z_gpu_multi_set_scal(x,val) - class(psb_z_multivect_gpu), intent(inout) :: x + subroutine z_cuda_multi_set_scal(x,val) + class(psb_z_multivect_cuda), intent(inout) :: x complex(psb_dpk_), intent(in) :: val integer(psb_ipk_) :: info @@ -1723,10 +1723,10 @@ contains if (x%is_dev()) call x%sync() call x%psb_z_base_multivect_type%set_scal(val) call x%set_host() - end subroutine z_gpu_multi_set_scal + end subroutine z_cuda_multi_set_scal - subroutine z_gpu_multi_set_vect(x,val) - class(psb_z_multivect_gpu), intent(inout) :: x + subroutine z_cuda_multi_set_vect(x,val) + class(psb_z_multivect_cuda), intent(inout) :: x complex(psb_dpk_), intent(in) :: val(:,:) integer(psb_ipk_) :: nr integer(psb_ipk_) :: info @@ -1735,24 +1735,24 @@ contains call x%psb_z_base_multivect_type%set_vect(val) call x%set_host() - end subroutine z_gpu_multi_set_vect + end subroutine z_cuda_multi_set_vect -!!$ subroutine z_gpu_multi_scal(alpha, x) +!!$ subroutine z_cuda_multi_scal(alpha, x) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ complex(psb_dpk_), intent (in) :: alpha !!$ !!$ if (x%is_dev()) call x%sync() !!$ call x%psb_z_base_multivect_type%scal(alpha) !!$ call x%set_host() -!!$ end subroutine z_gpu_multi_scal +!!$ end subroutine z_cuda_multi_scal !!$ !!$ -!!$ function z_gpu_multi_nrm2(n,x) result(res) +!!$ function z_cuda_multi_nrm2(n,x) result(res) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ integer(psb_ipk_) :: info @@ -1760,36 +1760,36 @@ contains !!$ if (x%is_host()) call x%sync() !!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) !!$ -!!$ end function z_gpu_multi_nrm2 +!!$ end function z_cuda_multi_nrm2 !!$ -!!$ function z_gpu_multi_amax(n,x) result(res) +!!$ function z_cuda_multi_amax(n,x) result(res) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = maxval(abs(x%v(1:n))) !!$ -!!$ end function z_gpu_multi_amax +!!$ end function z_cuda_multi_amax !!$ -!!$ function z_gpu_multi_asum(n,x) result(res) +!!$ function z_cuda_multi_asum(n,x) result(res) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = sum(abs(x%v(1:n))) !!$ -!!$ end function z_gpu_multi_asum +!!$ end function z_cuda_multi_asum - subroutine z_gpu_multi_all(m,n, x, info) + subroutine z_cuda_multi_all(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_multivect_gpu), intent(out) :: x + class(psb_z_multivect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(m,n,x%v,info,pad=zzero) @@ -1799,26 +1799,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'z_gpu_multi_all',& + call psb_errpush(info,'z_cuda_multi_all',& & i_err=(/m,n,n,n,n/)) end if - end subroutine z_gpu_multi_all + end subroutine z_cuda_multi_all - subroutine z_gpu_multi_zero(x) + subroutine z_cuda_multi_zero(x) use psi_serial_mod implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine z_gpu_multi_zero + end subroutine z_cuda_multi_zero - subroutine z_gpu_multi_asb(m,n, x, info) + subroutine z_cuda_multi_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nd, nc @@ -1838,12 +1838,12 @@ contains call x%set_host() end if end if - end subroutine z_gpu_multi_asb + end subroutine z_cuda_multi_asb - subroutine z_gpu_multi_sync_space(x,info) + subroutine z_cuda_multi_sync_space(x,info) use psb_realloc_mod implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: mh,nh,md,nd @@ -1896,11 +1896,11 @@ contains end if - end subroutine z_gpu_multi_sync_space + end subroutine z_cuda_multi_sync_space - subroutine z_gpu_multi_sync(x) + subroutine z_cuda_multi_sync(x) implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -1916,16 +1916,16 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'z_gpu_multi_sync') + call psb_errpush(info,'z_cuda_multi_sync') end if - end subroutine z_gpu_multi_sync + end subroutine z_cuda_multi_sync - subroutine z_gpu_multi_free(x, info) + subroutine z_cuda_multi_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 @@ -1940,13 +1940,13 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine z_gpu_multi_free + end subroutine z_cuda_multi_free - subroutine z_gpu_multi_vect_finalize(x) + subroutine z_cuda_multi_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_z_multivect_gpu), intent(inout) :: x + type(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 @@ -1961,12 +1961,12 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine z_gpu_multi_vect_finalize + end subroutine z_cuda_multi_vect_finalize - subroutine z_gpu_multi_ins(n,irl,val,dupl,x,info) + subroutine z_cuda_multi_ins(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) complex(psb_dpk_), intent(in) :: val(:,:) @@ -1979,11 +1979,11 @@ contains call x%psb_z_base_multivect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine z_gpu_multi_ins + end subroutine z_cuda_multi_ins #endif -end module psb_z_gpu_multivect_mod +end module psb_z_cuda_multivect_mod diff --git a/test/gpukern/Makefile b/test/cudakern/Makefile similarity index 100% rename from test/gpukern/Makefile rename to test/cudakern/Makefile diff --git a/test/gpukern/c_file_spmv.F90 b/test/cudakern/c_file_spmv.F90 similarity index 100% rename from test/gpukern/c_file_spmv.F90 rename to test/cudakern/c_file_spmv.F90 diff --git a/test/gpukern/d_file_spmv.F90 b/test/cudakern/d_file_spmv.F90 similarity index 100% rename from test/gpukern/d_file_spmv.F90 rename to test/cudakern/d_file_spmv.F90 diff --git a/test/gpukern/data_input.f90 b/test/cudakern/data_input.f90 similarity index 100% rename from test/gpukern/data_input.f90 rename to test/cudakern/data_input.f90 diff --git a/test/gpukern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 similarity index 99% rename from test/gpukern/dpdegenmv.F90 rename to test/cudakern/dpdegenmv.F90 index ab616471..b8a2ba2c 100644 --- a/test/gpukern/dpdegenmv.F90 +++ b/test/cudakern/dpdegenmv.F90 @@ -548,7 +548,7 @@ program pdgenmv use psb_util_mod use psb_ext_mod #ifdef HAVE_GPU - use psb_gpu_mod + use psb_cuda_mod #endif #ifdef HAVE_RSB use psb_rsb_mod @@ -619,7 +619,7 @@ program pdgenmv call psb_info(ctxt,iam,np) #ifdef HAVE_GPU - call psb_gpu_init(ctxt) + call psb_cuda_init(ctxt) #endif #ifdef HAVE_RSB call psb_rsb_init() @@ -641,7 +641,7 @@ program pdgenmv end if #ifdef HAVE_GPU write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) #endif ! ! get parameters @@ -761,7 +761,7 @@ program pdgenmv call psb_barrier(ctxt) t1 = psb_wtime() call agpu%cscnv(info,mold=agmold) - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() t2 = psb_Wtime() -t1 call psb_amx(ctxt,t2) if (j==1) tcnvg1 = t2 @@ -798,7 +798,7 @@ program pdgenmv end if end do - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() call psb_barrier(ctxt) tt2 = psb_wtime() - tt1 call psb_amx(ctxt,tt2) @@ -825,7 +825,7 @@ program pdgenmv end if end do - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() call psb_barrier(ctxt) gt2 = psb_wtime() - gt1 call psb_amx(ctxt,gt2) @@ -928,7 +928,7 @@ program pdgenmv #ifdef HAVE_GPU bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth - bdwdth = psb_gpu_MemoryPeakBandwidth() + bdwdth = psb_cuda_MemoryPeakBandwidth() write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth #endif write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() @@ -950,7 +950,7 @@ program pdgenmv goto 9999 end if #ifdef HAVE_GPU - call psb_gpu_exit() + call psb_cuda_exit() #endif call psb_exit(ctxt) stop diff --git a/test/gpukern/s_file_spmv.F90 b/test/cudakern/s_file_spmv.F90 similarity index 100% rename from test/gpukern/s_file_spmv.F90 rename to test/cudakern/s_file_spmv.F90 diff --git a/test/gpukern/spdegenmv.F90 b/test/cudakern/spdegenmv.F90 similarity index 100% rename from test/gpukern/spdegenmv.F90 rename to test/cudakern/spdegenmv.F90 diff --git a/test/gpukern/z_file_spmv.F90 b/test/cudakern/z_file_spmv.F90 similarity index 100% rename from test/gpukern/z_file_spmv.F90 rename to test/cudakern/z_file_spmv.F90