From 2982aaee27b501e18b6a8125a263a19f03b65897 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 8 Aug 2024 11:07:28 +0200 Subject: [PATCH] Implementation in OpenACC for ELL and HLL into templates. Merge from development --- base/modules/auxil/psi_c_serial_mod.f90 | 8 +- base/modules/auxil/psi_d_serial_mod.f90 | 8 +- base/modules/auxil/psi_e_serial_mod.f90 | 8 +- base/modules/auxil/psi_i2_serial_mod.f90 | 8 +- base/modules/auxil/psi_m_serial_mod.f90 | 8 +- base/modules/auxil/psi_s_serial_mod.f90 | 8 +- base/modules/auxil/psi_z_serial_mod.f90 | 8 +- base/modules/psblas/psb_c_psblas_mod.F90 | 8 +- base/modules/psblas/psb_d_psblas_mod.F90 | 8 +- base/modules/psblas/psb_s_psblas_mod.F90 | 8 +- base/modules/psblas/psb_z_psblas_mod.F90 | 8 +- base/modules/serial/psb_c_base_vect_mod.F90 | 14 +- base/modules/serial/psb_c_vect_mod.F90 | 8 +- base/modules/serial/psb_d_base_vect_mod.F90 | 14 +- base/modules/serial/psb_d_vect_mod.F90 | 8 +- base/modules/serial/psb_s_base_vect_mod.F90 | 14 +- base/modules/serial/psb_s_vect_mod.F90 | 8 +- base/modules/serial/psb_z_base_vect_mod.F90 | 14 +- base/modules/serial/psb_z_vect_mod.F90 | 8 +- base/psblas/psb_caxpby.f90 | 8 +- base/psblas/psb_daxpby.f90 | 8 +- base/psblas/psb_saxpby.f90 | 8 +- base/psblas/psb_zaxpby.f90 | 8 +- base/serial/psi_c_serial_impl.F90 | 8 +- base/serial/psi_d_serial_impl.F90 | 8 +- base/serial/psi_e_serial_impl.F90 | 8 +- base/serial/psi_i2_serial_impl.F90 | 8 +- base/serial/psi_m_serial_impl.F90 | 8 +- base/serial/psi_s_serial_impl.F90 | 8 +- base/serial/psi_z_serial_impl.F90 | 8 +- cuda/psb_c_cuda_vect_mod.F90 | 8 +- cuda/psb_c_vectordev_mod.F90 | 8 +- cuda/psb_d_cuda_vect_mod.F90 | 8 +- cuda/psb_d_vectordev_mod.F90 | 8 +- cuda/psb_s_cuda_vect_mod.F90 | 8 +- cuda/psb_s_vectordev_mod.F90 | 8 +- cuda/psb_z_cuda_vect_mod.F90 | 8 +- cuda/psb_z_vectordev_mod.F90 | 8 +- openacc/Makefile | 27 +- openacc/impl/Makefile | 84 +- openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90 | 47 + openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 | 78 ++ openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 | 24 + openacc/impl/psb_c_oacc_ell_csmm.F90 | 86 ++ openacc/impl/psb_c_oacc_ell_csmv.F90 | 82 ++ openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90 | 85 ++ openacc/impl/psb_c_oacc_ell_mold.F90 | 34 + openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 | 25 + openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 | 24 + openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 | 28 + openacc/impl/psb_c_oacc_ell_scal.F90 | 58 ++ openacc/impl/psb_c_oacc_ell_scals.F90 | 39 + openacc/impl/psb_c_oacc_ell_vect_mv.F90 | 66 ++ openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 | 53 + openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 | 85 ++ openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 | 24 + openacc/impl/psb_c_oacc_hll_csmm.F90 | 86 ++ openacc/impl/psb_c_oacc_hll_csmv.F90 | 84 ++ openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 | 86 ++ openacc/impl/psb_c_oacc_hll_mold.F90 | 34 + openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 | 25 + openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 | 24 + openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 | 29 + openacc/impl/psb_c_oacc_hll_scal.F90 | 62 ++ openacc/impl/psb_c_oacc_hll_scals.F90 | 40 + openacc/impl/psb_c_oacc_hll_vect_mv.F90 | 67 ++ openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 | 2 +- openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 | 2 +- openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 | 2 +- openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 | 2 +- openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 | 47 + openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 | 78 ++ openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 | 24 + openacc/impl/psb_s_oacc_ell_csmm.F90 | 86 ++ openacc/impl/psb_s_oacc_ell_csmv.F90 | 82 ++ openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 | 85 ++ openacc/impl/psb_s_oacc_ell_mold.F90 | 34 + openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 | 25 + openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 | 24 + openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 | 28 + openacc/impl/psb_s_oacc_ell_scal.F90 | 58 ++ openacc/impl/psb_s_oacc_ell_scals.F90 | 39 + openacc/impl/psb_s_oacc_ell_vect_mv.F90 | 66 ++ openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 | 53 + openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 | 85 ++ openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 | 24 + openacc/impl/psb_s_oacc_hll_csmm.F90 | 86 ++ openacc/impl/psb_s_oacc_hll_csmv.F90 | 84 ++ openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 | 86 ++ openacc/impl/psb_s_oacc_hll_mold.F90 | 34 + openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 | 25 + openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 | 24 + openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 | 29 + openacc/impl/psb_s_oacc_hll_scal.F90 | 62 ++ openacc/impl/psb_s_oacc_hll_scals.F90 | 40 + openacc/impl/psb_s_oacc_hll_vect_mv.F90 | 67 ++ openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 | 47 + openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 | 78 ++ openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 | 24 + openacc/impl/psb_z_oacc_ell_csmm.F90 | 86 ++ openacc/impl/psb_z_oacc_ell_csmv.F90 | 82 ++ openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 | 85 ++ openacc/impl/psb_z_oacc_ell_mold.F90 | 34 + openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 | 25 + openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 | 24 + openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 | 28 + openacc/impl/psb_z_oacc_ell_scal.F90 | 58 ++ openacc/impl/psb_z_oacc_ell_scals.F90 | 39 + openacc/impl/psb_z_oacc_ell_vect_mv.F90 | 66 ++ openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 | 53 + openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 | 85 ++ openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 | 24 + openacc/impl/psb_z_oacc_hll_csmm.F90 | 86 ++ openacc/impl/psb_z_oacc_hll_csmv.F90 | 84 ++ openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 | 86 ++ openacc/impl/psb_z_oacc_hll_mold.F90 | 34 + openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 | 25 + openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 | 24 + openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 | 29 + openacc/impl/psb_z_oacc_hll_scal.F90 | 62 ++ openacc/impl/psb_z_oacc_hll_scals.F90 | 40 + openacc/impl/psb_z_oacc_hll_vect_mv.F90 | 67 ++ openacc/psb_c_oacc_ell_mat_mod.F90 | 341 +++++++ openacc/psb_c_oacc_hll_mat_mod.F90 | 352 +++++++ openacc/psb_c_oacc_vect_mod.F90 | 33 +- openacc/psb_d_oacc_ell_mat_mod.F90 | 682 +++++++------ openacc/psb_d_oacc_hll_mat_mod.F90 | 702 +++++++------- openacc/psb_d_oacc_vect_mod.F90 | 33 +- openacc/psb_i_oacc_vect_mod.F90 | 916 +++++++++--------- openacc/psb_l_oacc_vect_mod.F90 | 507 ++++++++++ openacc/psb_oacc_mod.F90 | 9 + openacc/psb_s_oacc_ell_mat_mod.F90 | 341 +++++++ openacc/psb_s_oacc_hll_mat_mod.F90 | 352 +++++++ openacc/psb_s_oacc_vect_mod.F90 | 33 +- openacc/psb_z_oacc_ell_mat_mod.F90 | 341 +++++++ openacc/psb_z_oacc_hll_mat_mod.F90 | 352 +++++++ openacc/psb_z_oacc_vect_mod.F90 | 33 +- 137 files changed, 8195 insertions(+), 1404 deletions(-) create mode 100644 openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_csmm.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_csmv.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_mold.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_scal.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_scals.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_vect_mv.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_csmm.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_csmv.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_mold.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_scal.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_scals.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_vect_mv.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_csmm.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_csmv.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_mold.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_scal.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_scals.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_vect_mv.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_csmm.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_csmv.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_mold.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_scal.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_scals.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_vect_mv.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_csmm.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_csmv.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_mold.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_scal.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_scals.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_vect_mv.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_csmm.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_csmv.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_mold.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_scal.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_scals.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_vect_mv.F90 create mode 100644 openacc/psb_c_oacc_ell_mat_mod.F90 create mode 100644 openacc/psb_c_oacc_hll_mat_mod.F90 create mode 100644 openacc/psb_l_oacc_vect_mod.F90 create mode 100644 openacc/psb_s_oacc_ell_mat_mod.F90 create mode 100644 openacc/psb_s_oacc_hll_mat_mod.F90 create mode 100644 openacc/psb_z_oacc_ell_mat_mod.F90 create mode 100644 openacc/psb_z_oacc_hll_mat_mod.F90 diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90 index 3fe001c8..38b740a7 100644 --- a/base/modules/auxil/psi_c_serial_mod.f90 +++ b/base/modules/auxil/psi_c_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_c_serial_mod end subroutine psi_caxpbyv2 end interface psb_geaxpby - interface psi_upd_xyz - subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_abgdxyz + subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_spk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_c_serial_mod complex(psb_spk_), intent (inout) :: z(:) complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_c_upd_xyz - end interface psi_upd_xyz + end subroutine psi_cabgdxyz + end interface psi_abgdxyz interface psi_xyzw subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90 index a08263df..1d65c5f6 100644 --- a/base/modules/auxil/psi_d_serial_mod.f90 +++ b/base/modules/auxil/psi_d_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_d_serial_mod end subroutine psi_daxpbyv2 end interface psb_geaxpby - interface psi_upd_xyz - subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_abgdxyz + subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_dpk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_d_serial_mod real(psb_dpk_), intent (inout) :: z(:) real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_d_upd_xyz - end interface psi_upd_xyz + end subroutine psi_dabgdxyz + end interface psi_abgdxyz interface psi_xyzw subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90 index 1f1bebd7..6f4e8c06 100644 --- a/base/modules/auxil/psi_e_serial_mod.f90 +++ b/base/modules/auxil/psi_e_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_e_serial_mod end subroutine psi_eaxpbyv2 end interface psb_geaxpby - interface psi_upd_xyz - subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_abgdxyz + subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_e_serial_mod integer(psb_epk_), intent (inout) :: z(:) integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_e_upd_xyz - end interface psi_upd_xyz + end subroutine psi_eabgdxyz + end interface psi_abgdxyz interface psi_xyzw subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90 index 770d3256..ffa14059 100644 --- a/base/modules/auxil/psi_i2_serial_mod.f90 +++ b/base/modules/auxil/psi_i2_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_i2_serial_mod end subroutine psi_i2axpbyv2 end interface psb_geaxpby - interface psi_upd_xyz - subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_abgdxyz + subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_i2_serial_mod integer(psb_i2pk_), intent (inout) :: z(:) integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_i2_upd_xyz - end interface psi_upd_xyz + end subroutine psi_i2abgdxyz + end interface psi_abgdxyz interface psi_xyzw subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90 index 3583cccc..5661fdbf 100644 --- a/base/modules/auxil/psi_m_serial_mod.f90 +++ b/base/modules/auxil/psi_m_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_m_serial_mod end subroutine psi_maxpbyv2 end interface psb_geaxpby - interface psi_upd_xyz - subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_abgdxyz + subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_m_serial_mod integer(psb_mpk_), intent (inout) :: z(:) integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_m_upd_xyz - end interface psi_upd_xyz + end subroutine psi_mabgdxyz + end interface psi_abgdxyz interface psi_xyzw subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90 index 3e0c6d91..5cc17d58 100644 --- a/base/modules/auxil/psi_s_serial_mod.f90 +++ b/base/modules/auxil/psi_s_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_s_serial_mod end subroutine psi_saxpbyv2 end interface psb_geaxpby - interface psi_upd_xyz - subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_abgdxyz + subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_spk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_s_serial_mod real(psb_spk_), intent (inout) :: z(:) real(psb_spk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_s_upd_xyz - end interface psi_upd_xyz + end subroutine psi_sabgdxyz + end interface psi_abgdxyz interface psi_xyzw subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90 index a8ea734e..8a3f053d 100644 --- a/base/modules/auxil/psi_z_serial_mod.f90 +++ b/base/modules/auxil/psi_z_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_z_serial_mod end subroutine psi_zaxpbyv2 end interface psb_geaxpby - interface psi_upd_xyz - subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_abgdxyz + subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_dpk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_z_serial_mod complex(psb_dpk_), intent (inout) :: z(:) complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_z_upd_xyz - end interface psi_upd_xyz + end subroutine psi_zabgdxyz + end interface psi_abgdxyz interface psi_xyzw subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/psblas/psb_c_psblas_mod.F90 b/base/modules/psblas/psb_c_psblas_mod.F90 index 591dec09..7f7f937c 100644 --- a/base/modules/psblas/psb_c_psblas_mod.F90 +++ b/base/modules/psblas/psb_c_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_c_psblas_mod end subroutine psb_caxpby end interface - interface psb_upd_xyz - subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_abgdxyz + subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_spk_, psb_ipk_, & & psb_c_vect_type, psb_cspmat_type @@ -154,8 +154,8 @@ module psb_c_psblas_mod complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_upd_xyz_vect - end interface psb_upd_xyz + end subroutine psb_cabgdxyz_vect + end interface psb_abgdxyz interface psb_geamax function psb_camax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_d_psblas_mod.F90 b/base/modules/psblas/psb_d_psblas_mod.F90 index b200bc8a..12090956 100644 --- a/base/modules/psblas/psb_d_psblas_mod.F90 +++ b/base/modules/psblas/psb_d_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_d_psblas_mod end subroutine psb_daxpby end interface - interface psb_upd_xyz - subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_abgdxyz + subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_dpk_, psb_ipk_, & & psb_d_vect_type, psb_dspmat_type @@ -154,8 +154,8 @@ module psb_d_psblas_mod real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_upd_xyz_vect - end interface psb_upd_xyz + end subroutine psb_dabgdxyz_vect + end interface psb_abgdxyz interface psb_geamax function psb_damax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_s_psblas_mod.F90 b/base/modules/psblas/psb_s_psblas_mod.F90 index a60da025..7a7ce783 100644 --- a/base/modules/psblas/psb_s_psblas_mod.F90 +++ b/base/modules/psblas/psb_s_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_s_psblas_mod end subroutine psb_saxpby end interface - interface psb_upd_xyz - subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_abgdxyz + subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_spk_, psb_ipk_, & & psb_s_vect_type, psb_sspmat_type @@ -154,8 +154,8 @@ module psb_s_psblas_mod real(psb_spk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_upd_xyz_vect - end interface psb_upd_xyz + end subroutine psb_sabgdxyz_vect + end interface psb_abgdxyz interface psb_geamax function psb_samax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_z_psblas_mod.F90 b/base/modules/psblas/psb_z_psblas_mod.F90 index 241df2b9..bcfe9caa 100644 --- a/base/modules/psblas/psb_z_psblas_mod.F90 +++ b/base/modules/psblas/psb_z_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_z_psblas_mod end subroutine psb_zaxpby end interface - interface psb_upd_xyz - subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_abgdxyz + subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_dpk_, psb_ipk_, & & psb_z_vect_type, psb_zspmat_type @@ -154,8 +154,8 @@ module psb_z_psblas_mod complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_upd_xyz_vect - end interface psb_upd_xyz + end subroutine psb_zabgdxyz_vect + end interface psb_abgdxyz interface psb_geamax function psb_zamax(x, desc_a, info, jx,global) diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index 4dac86d6..41bab5ab 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_c_base_vect_mod procedure, pass(z) :: axpby_v2 => c_base_axpby_v2 procedure, pass(z) :: axpby_a2 => c_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => c_base_upd_xyz + procedure, pass(z) :: abgdxyz => c_base_abgdxyz procedure, pass(w) :: xyzw => c_base_xyzw ! @@ -1130,12 +1130,12 @@ contains end subroutine c_base_axpby_a2 ! - ! UPD_XYZ is invoked via Z, hence the structure below. + ! ABGDXYZ is invoked via Z, hence the structure below. ! ! - !> Function base_upd_xyz + !> Function base_abgdxyz !! \memberof psb_c_base_vect_type - !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1146,7 +1146,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine c_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1159,11 +1159,11 @@ contains if (x%is_dev().and.(alpha/=czero)) call x%sync() if (y%is_dev().and.(beta/=czero)) call y%sync() if (z%is_dev().and.(delta/=czero)) call z%sync() - call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine c_base_upd_xyz + end subroutine c_base_abgdxyz subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index 1e9510f2..865f9456 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_c_vect_mod procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => c_vect_upd_xyz + procedure, pass(z) :: abgdxyz => c_vect_abgdxyz procedure, pass(z) :: xyzw => c_vect_xyzw procedure, pass(y) :: mlt_v => c_vect_mlt_v @@ -774,7 +774,7 @@ contains end subroutine c_vect_axpby_a2 - subroutine c_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -785,9 +785,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine c_vect_upd_xyz + end subroutine c_vect_abgdxyz subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index f07b5aed..1ad1ffa5 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_d_base_vect_mod procedure, pass(z) :: axpby_v2 => d_base_axpby_v2 procedure, pass(z) :: axpby_a2 => d_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => d_base_upd_xyz + procedure, pass(z) :: abgdxyz => d_base_abgdxyz procedure, pass(w) :: xyzw => d_base_xyzw ! @@ -1137,12 +1137,12 @@ contains end subroutine d_base_axpby_a2 ! - ! UPD_XYZ is invoked via Z, hence the structure below. + ! ABGDXYZ is invoked via Z, hence the structure below. ! ! - !> Function base_upd_xyz + !> Function base_abgdxyz !! \memberof psb_d_base_vect_type - !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1153,7 +1153,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine d_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1166,11 +1166,11 @@ contains if (x%is_dev().and.(alpha/=dzero)) call x%sync() if (y%is_dev().and.(beta/=dzero)) call y%sync() if (z%is_dev().and.(delta/=dzero)) call z%sync() - call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine d_base_upd_xyz + end subroutine d_base_abgdxyz subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index ae3062dd..55dd8230 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_d_vect_mod procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => d_vect_upd_xyz + procedure, pass(z) :: abgdxyz => d_vect_abgdxyz procedure, pass(z) :: xyzw => d_vect_xyzw procedure, pass(y) :: mlt_v => d_vect_mlt_v @@ -781,7 +781,7 @@ contains end subroutine d_vect_axpby_a2 - subroutine d_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -792,9 +792,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine d_vect_upd_xyz + end subroutine d_vect_abgdxyz subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index 596cd634..26b82c31 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_s_base_vect_mod procedure, pass(z) :: axpby_v2 => s_base_axpby_v2 procedure, pass(z) :: axpby_a2 => s_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => s_base_upd_xyz + procedure, pass(z) :: abgdxyz => s_base_abgdxyz procedure, pass(w) :: xyzw => s_base_xyzw ! @@ -1137,12 +1137,12 @@ contains end subroutine s_base_axpby_a2 ! - ! UPD_XYZ is invoked via Z, hence the structure below. + ! ABGDXYZ is invoked via Z, hence the structure below. ! ! - !> Function base_upd_xyz + !> Function base_abgdxyz !! \memberof psb_s_base_vect_type - !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1153,7 +1153,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine s_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1166,11 +1166,11 @@ contains if (x%is_dev().and.(alpha/=szero)) call x%sync() if (y%is_dev().and.(beta/=szero)) call y%sync() if (z%is_dev().and.(delta/=szero)) call z%sync() - call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine s_base_upd_xyz + end subroutine s_base_abgdxyz subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index cad4659c..a50b2a0a 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_s_vect_mod procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => s_vect_upd_xyz + procedure, pass(z) :: abgdxyz => s_vect_abgdxyz procedure, pass(z) :: xyzw => s_vect_xyzw procedure, pass(y) :: mlt_v => s_vect_mlt_v @@ -781,7 +781,7 @@ contains end subroutine s_vect_axpby_a2 - subroutine s_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -792,9 +792,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine s_vect_upd_xyz + end subroutine s_vect_abgdxyz subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index 1bbdfba1..a3afc9c1 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_z_base_vect_mod procedure, pass(z) :: axpby_v2 => z_base_axpby_v2 procedure, pass(z) :: axpby_a2 => z_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => z_base_upd_xyz + procedure, pass(z) :: abgdxyz => z_base_abgdxyz procedure, pass(w) :: xyzw => z_base_xyzw ! @@ -1130,12 +1130,12 @@ contains end subroutine z_base_axpby_a2 ! - ! UPD_XYZ is invoked via Z, hence the structure below. + ! ABGDXYZ is invoked via Z, hence the structure below. ! ! - !> Function base_upd_xyz + !> Function base_abgdxyz !! \memberof psb_z_base_vect_type - !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1146,7 +1146,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine z_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1159,11 +1159,11 @@ contains if (x%is_dev().and.(alpha/=zzero)) call x%sync() if (y%is_dev().and.(beta/=zzero)) call y%sync() if (z%is_dev().and.(delta/=zzero)) call z%sync() - call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine z_base_upd_xyz + end subroutine z_base_abgdxyz subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index 48f2e947..21e0c546 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_z_vect_mod procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => z_vect_upd_xyz + procedure, pass(z) :: abgdxyz => z_vect_abgdxyz procedure, pass(z) :: xyzw => z_vect_xyzw procedure, pass(y) :: mlt_v => z_vect_mlt_v @@ -774,7 +774,7 @@ contains end subroutine z_vect_axpby_a2 - subroutine z_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -785,9 +785,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine z_vect_upd_xyz + end subroutine z_vect_abgdxyz subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index 5d80ef00..7c22bb06 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info) end subroutine psb_caddconst_vect -subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_c_upd_xyz_vect + use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect implicit none type(psb_c_vect_type), intent (inout) :: x type(psb_c_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_c_upd_xyz_vect +end subroutine psb_cabgdxyz_vect diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index 38ebe465..1de77647 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info) end subroutine psb_daddconst_vect -subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_d_upd_xyz_vect + use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect implicit none type(psb_d_vect_type), intent (inout) :: x type(psb_d_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_d_upd_xyz_vect +end subroutine psb_dabgdxyz_vect diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 0055fdbe..1b1f24e6 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info) end subroutine psb_saddconst_vect -subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_s_upd_xyz_vect + use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect implicit none type(psb_s_vect_type), intent (inout) :: x type(psb_s_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_s_upd_xyz_vect +end subroutine psb_sabgdxyz_vect diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index e93488e3..0f37a1f4 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info) end subroutine psb_zaddconst_vect -subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_z_upd_xyz_vect + use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect implicit none type(psb_z_vect_type), intent (inout) :: x type(psb_z_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_z_upd_xyz_vect +end subroutine psb_zabgdxyz_vect diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90 index e3f1d9a3..e230a1e0 100644 --- a/base/serial/psi_c_serial_impl.F90 +++ b/base/serial/psi_c_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine caxpbyv2 -subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='c_upd_xyz' + name='cabgdxyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_c_upd_xyz +end subroutine psi_cabgdxyz subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='c_xyzw' + name='cabgdxyz' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90 index d6a9a31d..bf1b2917 100644 --- a/base/serial/psi_d_serial_impl.F90 +++ b/base/serial/psi_d_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine daxpbyv2 -subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='d_upd_xyz' + name='dabgdxyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_d_upd_xyz +end subroutine psi_dabgdxyz subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='d_xyzw' + name='dabgdxyz' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90 index 37b11a94..911ab4ec 100644 --- a/base/serial/psi_e_serial_impl.F90 +++ b/base/serial/psi_e_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine eaxpbyv2 -subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='e_upd_xyz' + name='eabgdxyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_e_upd_xyz +end subroutine psi_eabgdxyz subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='e_xyzw' + name='eabgdxyz' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90 index c20cd60b..fb42dfcd 100644 --- a/base/serial/psi_i2_serial_impl.F90 +++ b/base/serial/psi_i2_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine i2axpbyv2 -subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='i2_upd_xyz' + name='i2abgdxyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_i2_upd_xyz +end subroutine psi_i2abgdxyz subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='i2_xyzw' + name='i2abgdxyz' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90 index 55913a16..346fd897 100644 --- a/base/serial/psi_m_serial_impl.F90 +++ b/base/serial/psi_m_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine maxpbyv2 -subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='m_upd_xyz' + name='mabgdxyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_m_upd_xyz +end subroutine psi_mabgdxyz subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='m_xyzw' + name='mabgdxyz' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90 index c3846c8e..52f86bcd 100644 --- a/base/serial/psi_s_serial_impl.F90 +++ b/base/serial/psi_s_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine saxpbyv2 -subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='s_upd_xyz' + name='sabgdxyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_s_upd_xyz +end subroutine psi_sabgdxyz subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='s_xyzw' + name='sabgdxyz' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90 index 763eae22..7e680273 100644 --- a/base/serial/psi_z_serial_impl.F90 +++ b/base/serial/psi_z_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine zaxpbyv2 -subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='z_upd_xyz' + name='zabgdxyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_z_upd_xyz +end subroutine psi_zabgdxyz subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='z_xyzw' + name='zabgdxyz' info = psb_success_ if (m.lt.0) then diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index 45fafe0a..2c2a4f61 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -90,7 +90,7 @@ module psb_c_cuda_vect_mod procedure, pass(x) :: dot_a => c_cuda_dot_a procedure, pass(y) :: axpby_v => c_cuda_axpby_v procedure, pass(y) :: axpby_a => c_cuda_axpby_a - procedure, pass(z) :: upd_xyz => c_cuda_upd_xyz + procedure, pass(z) :: abgdxyz => c_cuda_abgdxyz procedure, pass(y) :: mlt_v => c_cuda_mlt_v procedure, pass(y) :: mlt_a => c_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine c_cuda_axpby_v - subroutine c_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx d_cuda_dot_a procedure, pass(y) :: axpby_v => d_cuda_axpby_v procedure, pass(y) :: axpby_a => d_cuda_axpby_a - procedure, pass(z) :: upd_xyz => d_cuda_upd_xyz + procedure, pass(z) :: abgdxyz => d_cuda_abgdxyz procedure, pass(y) :: mlt_v => d_cuda_mlt_v procedure, pass(y) :: mlt_a => d_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine d_cuda_axpby_v - subroutine d_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine d_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx s_cuda_dot_a procedure, pass(y) :: axpby_v => s_cuda_axpby_v procedure, pass(y) :: axpby_a => s_cuda_axpby_a - procedure, pass(z) :: upd_xyz => s_cuda_upd_xyz + procedure, pass(z) :: abgdxyz => s_cuda_abgdxyz procedure, pass(y) :: mlt_v => s_cuda_mlt_v procedure, pass(y) :: mlt_a => s_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine s_cuda_axpby_v - subroutine s_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine s_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx z_cuda_dot_a procedure, pass(y) :: axpby_v => z_cuda_axpby_v procedure, pass(y) :: axpby_a => z_cuda_axpby_a - procedure, pass(z) :: upd_xyz => z_cuda_upd_xyz + procedure, pass(z) :: abgdxyz => z_cuda_abgdxyz procedure, pass(y) :: mlt_v => z_cuda_mlt_v procedure, pass(y) :: mlt_a => z_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine z_cuda_axpby_v - subroutine z_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine z_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx x) + type is (psb_c_vect_oacc) + select type(yy => y) + type is (psb_c_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nzt = a%nzt + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val, 1) + do j = 1, nzt + yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'ell_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_c_oacc_ell_inner_vect_sv +end submodule psb_c_oacc_ell_inner_vect_sv_impl diff --git a/openacc/impl/psb_c_oacc_ell_mold.F90 b/openacc/impl/psb_c_oacc_ell_mold.F90 new file mode 100644 index 00000000..88331d1d --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mold_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_mold(a, b, info) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'ell_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_c_oacc_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_c_oacc_ell_mold +end submodule psb_c_oacc_ell_mold_impl diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 new file mode 100644 index 00000000..7e703aa2 --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_mv_from_coo(a, b, info) + implicit none + + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_c_ell_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_c_oacc_ell_mv_from_coo +end submodule psb_c_oacc_ell_mv_from_coo_impl diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 new file mode 100644 index 00000000..7d1f790d --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_mv_from_fmt(a, b, info) + implicit none + + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_c_ell_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + end select + + end subroutine psb_c_oacc_ell_mv_from_fmt +end submodule psb_c_oacc_ell_mv_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 new file mode 100644 index 00000000..9f21c5df --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_oacc_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_ell_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_ell_reallocate_nz +end submodule psb_c_oacc_ell_reallocate_nz_impl diff --git a/openacc/impl/psb_c_oacc_ell_scal.F90 b/openacc/impl/psb_c_oacc_ell_scal.F90 new file mode 100644 index 00000000..b3ea90fb --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_scal.F90 @@ -0,0 +1,58 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_scal_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_scal(d, a, info, side) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, m, nzt + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(a%ja(i, j)) + end do + end do + end if + else + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(j) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_ell_scal +end submodule psb_c_oacc_ell_scal_impl diff --git a/openacc/impl/psb_c_oacc_ell_scals.F90 b/openacc/impl/psb_c_oacc_ell_scals.F90 new file mode 100644 index 00000000..f067f253 --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_scals.F90 @@ -0,0 +1,39 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_scals_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_scals(d, a, info) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, nzt, m + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_ell_scals +end submodule psb_c_oacc_ell_scals_impl diff --git a/openacc/impl/psb_c_oacc_ell_vect_mv.F90 b/openacc/impl/psb_c_oacc_ell_vect_mv.F90 new file mode 100644 index 00000000..e65d00ba --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_vect_mv.F90 @@ -0,0 +1,66 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nzt + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nzt = a%nzt + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n, nzt + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_) :: val(:,:), x(:), y(:) + integer(psb_ipk_) :: ja(:,:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + complex(psb_spk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = 1, nzt + if (ja(i,j) > 0) then + tmp = tmp + val(i,j) * x(ja(i,j)) + end if + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_c_oacc_ell_vect_mv +end submodule psb_c_oacc_ell_vect_mv_impl diff --git a/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 new file mode 100644 index 00000000..0840d0d6 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 @@ -0,0 +1,53 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_c_hll_sparse_mat%allocate(m, n, nz_) + + hksz = a%hksz + nhacks = (m + hksz - 1) / hksz + + if (.not.allocated(a%val)) then + allocate(a%val(nz_ * m)) + allocate(a%ja(nz_ * m)) + allocate(a%irn(m)) + allocate(a%idiag(m)) + allocate(a%hkoffs(nhacks)) + end if + + a%val = czero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + a%hkoffs = 0 + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_allocate_mnnz +end submodule psb_c_oacc_hll_allocate_mnnz_impl diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 new file mode 100644 index 00000000..4c12cdf8 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 @@ -0,0 +1,85 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_cp_from_coo(a, b, info) + implicit none + + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, k, row, col, nz_per_row + complex(psb_spk_) :: value + integer(psb_ipk_), allocatable :: row_counts(:) + integer(psb_ipk_) :: hacksize, nza + + info = psb_success_ + hacksize = 32 ! Assuming a default hack size of 32 + + call a%set_nrows(b%get_nrows()) + call a%set_ncols(b%get_ncols()) + nz_per_row = a%nzt + + if (.not.allocated(a%val)) then + allocate(a%val(nz_per_row * a%get_nrows())) + allocate(a%ja(nz_per_row * a%get_nrows())) + allocate(a%irn(a%get_nrows())) + allocate(a%idiag(a%get_nrows())) + allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) + end if + a%val = czero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + allocate(row_counts(a%get_nrows())) + row_counts = 0 + + nza = b%get_nzeros() + + !$acc parallel loop present(b, a, row_counts) + do k = 1, nza + row = b%ia(k) + col = b%ja(k) + value = b%val(k) + if (row_counts(row) < nz_per_row) then + a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value + a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col + row_counts(row) = row_counts(row) + 1 + else + info = psb_err_invalid_mat_state_ + !goto 9999 + end if + end do + + a%irn = row_counts + + !$acc parallel loop present(a) + do i = 1, a%get_nrows() + do j = 1, nz_per_row + if (a%ja(j + (i - 1) * nz_per_row) == i) then + a%idiag(i) = j + exit + end if + end do + end do + + ! Calculate hkoffs for HLL format + !$acc parallel loop present(a) + do i = 1, size(a%hkoffs) + a%hkoffs(i) = (i - 1) * hacksize + end do + + deallocate(row_counts) + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_c_oacc_hll_cp_from_coo +end submodule psb_c_oacc_hll_cp_from_coo_impl diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 new file mode 100644 index 00000000..af6cc1d5 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_cp_from_fmt(a, b, info) + implicit none + + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_c_hll_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_c_oacc_hll_cp_from_fmt +end submodule psb_c_oacc_hll_cp_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_hll_csmm.F90 b/openacc/impl/psb_c_oacc_hll_csmm.F90 new file mode 100644 index 00000000..6b0fc637 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_csmm_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_), intent(in) :: x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'c_oacc_hll_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_c_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop present(a, x, y) + do j = 1, nxy + do k = 1, nhacks + do i = a%hkoffs(k), a%hkoffs(k + 1) - 1 + y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_csmm +end submodule psb_c_oacc_hll_csmm_impl diff --git a/openacc/impl/psb_c_oacc_hll_csmv.F90 b/openacc/impl/psb_c_oacc_hll_csmv.F90 new file mode 100644 index 00000000..f32e37b7 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_csmv.F90 @@ -0,0 +1,84 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_csmv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_), intent(in) :: x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, hksz, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'c_oacc_hll_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_c_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + ! This loop nest cannot be run with collapse, since + ! the inner loop extent varies. + !$acc parallel loop present(a, x, y) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j)) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_csmv +end submodule psb_c_oacc_hll_csmv_impl diff --git a/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 new file mode 100644 index 00000000..a8d486b2 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 @@ -0,0 +1,86 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'c_oacc_hll_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nhacks, hksz + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_c_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_c_vect_oacc) + select type(yy => y) + type is (psb_c_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + !$acc parallel loop present(a, xx, yy) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i+1) - 1 + yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j)) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'hll_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_c_oacc_hll_inner_vect_sv +end submodule psb_c_oacc_hll_inner_vect_sv_impl diff --git a/openacc/impl/psb_c_oacc_hll_mold.F90 b/openacc/impl/psb_c_oacc_hll_mold.F90 new file mode 100644 index 00000000..f480f3ab --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mold_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_mold(a, b, info) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'hll_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_c_oacc_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_c_oacc_hll_mold +end submodule psb_c_oacc_hll_mold_impl diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 new file mode 100644 index 00000000..dec52d40 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_mv_from_coo(a, b, info) + implicit none + + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_c_hll_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_c_oacc_hll_mv_from_coo +end submodule psb_c_oacc_hll_mv_from_coo_impl diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 new file mode 100644 index 00000000..f2a064cb --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_mv_from_fmt(a, b, info) + implicit none + + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_c_hll_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_c_oacc_hll_mv_from_fmt +end submodule psb_c_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 new file mode 100644 index 00000000..52983d4e --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 @@ -0,0 +1,29 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_oacc_hll_reallocate_nz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_hll_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_reallocate_nz +end submodule psb_c_oacc_hll_reallocate_nz_impl diff --git a/openacc/impl/psb_c_oacc_hll_scal.F90 b/openacc/impl/psb_c_oacc_hll_scal.F90 new file mode 100644 index 00000000..527a0ec1 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_scal.F90 @@ -0,0 +1,62 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_scal_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_scal(d, a, info, side) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1 + a%val(j) = a%val(j) * d(k) + end do + end do + else if (side == 'R') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_scal +end submodule psb_c_oacc_hll_scal_impl diff --git a/openacc/impl/psb_c_oacc_hll_scals.F90 b/openacc/impl/psb_c_oacc_hll_scals.F90 new file mode 100644 index 00000000..00f24721 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_scals.F90 @@ -0,0 +1,40 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_scals_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_scals(d, a, info) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + ! $ a parallel loop collapse(2) present(a) + !$acc parallel loop present(a) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_scals +end submodule psb_c_oacc_hll_scals_impl diff --git a/openacc/impl/psb_c_oacc_hll_vect_mv.F90 b/openacc/impl/psb_c_oacc_hll_vect_mv.F90 new file mode 100644 index 00000000..3b74d11a --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_vect_mv.F90 @@ -0,0 +1,67 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nhacks, hksz + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, nhacks, hksz + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), hkoffs(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, idx, k + complex(psb_spk_) :: tmp + + info = 0 + + !$acc parallel loop present(val, ja, hkoffs, x, y) + do i = 1, nhacks + do k = 0, hksz - 1 + idx = hkoffs(i) + k + if (idx <= hkoffs(i + 1) - 1) then + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz + if (ja(j) > 0) then + tmp = tmp + val(j) * x(ja(j)) + end if + end do + y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) + end if + end do + end do + end subroutine inner_spmv + + end subroutine psb_c_oacc_hll_vect_mv +end submodule psb_c_oacc_hll_vect_mv_impl diff --git a/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 index 4923e12c..b46c5454 100644 --- a/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 +++ b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 @@ -29,7 +29,7 @@ contains allocate(a%idiag(m)) end if - a%val = 0.0_psb_dpk_ + a%val = dzero a%ja = -1 a%irn = 0 a%idiag = 0 diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 index 4e8402e7..c13d1edd 100644 --- a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 @@ -26,7 +26,7 @@ contains allocate(a%irn(a%get_nrows())) allocate(a%idiag(a%get_nrows())) end if - a%val = 0.0_psb_dpk_ + a%val = dzero a%ja = -1 a%irn = 0 a%idiag = 0 diff --git a/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 index 909ee90b..47a6933b 100644 --- a/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 +++ b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 @@ -34,7 +34,7 @@ contains allocate(a%hkoffs(nhacks)) end if - a%val = 0.0_psb_dpk_ + a%val = dzero a%ja = -1 a%irn = 0 a%idiag = 0 diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 index 4a258c74..18bd768b 100644 --- a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 @@ -27,7 +27,7 @@ contains allocate(a%idiag(a%get_nrows())) allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) end if - a%val = 0.0_psb_dpk_ + a%val = dzero a%ja = -1 a%irn = 0 a%idiag = 0 diff --git a/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 new file mode 100644 index 00000000..38c19b78 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 @@ -0,0 +1,47 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_s_ell_sparse_mat%allocate(m, n, nz_) + + if (.not.allocated(a%val)) then + allocate(a%val(m, nz_)) + allocate(a%ja(m, nz_)) + allocate(a%irn(m)) + allocate(a%idiag(m)) + end if + + a%val = szero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_allocate_mnnz +end submodule psb_s_oacc_ell_allocate_mnnz_impl diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 new file mode 100644 index 00000000..9aaaff73 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 @@ -0,0 +1,78 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_cp_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, k, row, col, nz_per_row + real(psb_spk_) :: value + integer(psb_ipk_), allocatable :: row_counts(:) + integer(psb_ipk_) :: hacksize, nza + + info = psb_success_ + hacksize = 1 + + call a%set_nrows(b%get_nrows()) + call a%set_ncols(b%get_ncols()) + nz_per_row = a%nzt + + if (.not.allocated(a%val)) then + allocate(a%val(a%get_nrows(), nz_per_row)) + allocate(a%ja(a%get_nrows(), nz_per_row)) + allocate(a%irn(a%get_nrows())) + allocate(a%idiag(a%get_nrows())) + end if + a%val = szero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + allocate(row_counts(a%get_nrows())) + row_counts = 0 + + nza = b%get_nzeros() + + !$acc parallel loop present(b, a, row_counts) + do k = 1, nza + row = b%ia(k) + col = b%ja(k) + value = b%val(k) + if (row_counts(row) < nz_per_row) then + a%val(row, row_counts(row) + 1) = value + a%ja(row, row_counts(row) + 1) = col + row_counts(row) = row_counts(row) + 1 + else + info = psb_err_invalid_mat_state_ + !goto 9999 + end if + end do + + a%irn = row_counts + + !$acc parallel loop present(a) + do i = 1, a%get_nrows() + do j = 1, nz_per_row + if (a%ja(i, j) == i) then + a%idiag(i) = j + exit + end if + end do + end do + + deallocate(row_counts) + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_ell_cp_from_coo +end submodule psb_s_oacc_ell_cp_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 new file mode 100644 index 00000000..d4c1a233 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_cp_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_s_ell_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + end select + + end subroutine psb_s_oacc_ell_cp_from_fmt +end submodule psb_s_oacc_ell_cp_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_ell_csmm.F90 b/openacc/impl/psb_s_oacc_ell_csmm.F90 new file mode 100644 index 00000000..63219384 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_csmm_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_), intent(in) :: x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_ell_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_s_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, n + do k = 1, nzt + y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_csmm +end submodule psb_s_oacc_ell_csmm_impl diff --git a/openacc/impl/psb_s_oacc_ell_csmv.F90 b/openacc/impl/psb_s_oacc_ell_csmv.F90 new file mode 100644 index 00000000..d4aaa9d4 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_csmv.F90 @@ -0,0 +1,82 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_csmv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_), intent(in) :: x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, nzt + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_ell_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_s_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nzt = a%nzt + + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + + !$acc parallel loop present(a, x, y) + do i = 1, m + do j = 1, nzt + y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j)) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_csmv +end submodule psb_s_oacc_ell_csmv_impl diff --git a/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 new file mode 100644 index 00000000..ba42af12 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 @@ -0,0 +1,85 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_ell_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nzt + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_s_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_oacc) + select type(yy => y) + type is (psb_s_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nzt = a%nzt + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val, 1) + do j = 1, nzt + yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'ell_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_s_oacc_ell_inner_vect_sv +end submodule psb_s_oacc_ell_inner_vect_sv_impl diff --git a/openacc/impl/psb_s_oacc_ell_mold.F90 b/openacc/impl/psb_s_oacc_ell_mold.F90 new file mode 100644 index 00000000..92f18f25 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_mold_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_mold(a, b, info) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'ell_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_s_oacc_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_s_oacc_ell_mold +end submodule psb_s_oacc_ell_mold_impl diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 new file mode 100644 index 00000000..d6bbec13 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_mv_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_s_ell_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_ell_mv_from_coo +end submodule psb_s_oacc_ell_mv_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 new file mode 100644 index 00000000..ebb82901 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_mv_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_s_ell_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + end select + + end subroutine psb_s_oacc_ell_mv_from_fmt +end submodule psb_s_oacc_ell_mv_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 new file mode 100644 index 00000000..373c2b67 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_oacc_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_ell_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_reallocate_nz +end submodule psb_s_oacc_ell_reallocate_nz_impl diff --git a/openacc/impl/psb_s_oacc_ell_scal.F90 b/openacc/impl/psb_s_oacc_ell_scal.F90 new file mode 100644 index 00000000..180d8f9a --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_scal.F90 @@ -0,0 +1,58 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_scal_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_scal(d, a, info, side) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, m, nzt + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(a%ja(i, j)) + end do + end do + end if + else + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(j) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_scal +end submodule psb_s_oacc_ell_scal_impl diff --git a/openacc/impl/psb_s_oacc_ell_scals.F90 b/openacc/impl/psb_s_oacc_ell_scals.F90 new file mode 100644 index 00000000..c1c305af --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_scals.F90 @@ -0,0 +1,39 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_scals_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_scals(d, a, info) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, nzt, m + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_scals +end submodule psb_s_oacc_ell_scals_impl diff --git a/openacc/impl/psb_s_oacc_ell_vect_mv.F90 b/openacc/impl/psb_s_oacc_ell_vect_mv.F90 new file mode 100644 index 00000000..f48ba041 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_vect_mv.F90 @@ -0,0 +1,66 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nzt + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nzt = a%nzt + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n, nzt + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_) :: val(:,:), x(:), y(:) + integer(psb_ipk_) :: ja(:,:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + real(psb_spk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = 1, nzt + if (ja(i,j) > 0) then + tmp = tmp + val(i,j) * x(ja(i,j)) + end if + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_s_oacc_ell_vect_mv +end submodule psb_s_oacc_ell_vect_mv_impl diff --git a/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 new file mode 100644 index 00000000..c67ea621 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 @@ -0,0 +1,53 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_s_hll_sparse_mat%allocate(m, n, nz_) + + hksz = a%hksz + nhacks = (m + hksz - 1) / hksz + + if (.not.allocated(a%val)) then + allocate(a%val(nz_ * m)) + allocate(a%ja(nz_ * m)) + allocate(a%irn(m)) + allocate(a%idiag(m)) + allocate(a%hkoffs(nhacks)) + end if + + a%val = szero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + a%hkoffs = 0 + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_allocate_mnnz +end submodule psb_s_oacc_hll_allocate_mnnz_impl diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 new file mode 100644 index 00000000..34a0b5d5 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 @@ -0,0 +1,85 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_cp_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, k, row, col, nz_per_row + real(psb_spk_) :: value + integer(psb_ipk_), allocatable :: row_counts(:) + integer(psb_ipk_) :: hacksize, nza + + info = psb_success_ + hacksize = 32 ! Assuming a default hack size of 32 + + call a%set_nrows(b%get_nrows()) + call a%set_ncols(b%get_ncols()) + nz_per_row = a%nzt + + if (.not.allocated(a%val)) then + allocate(a%val(nz_per_row * a%get_nrows())) + allocate(a%ja(nz_per_row * a%get_nrows())) + allocate(a%irn(a%get_nrows())) + allocate(a%idiag(a%get_nrows())) + allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) + end if + a%val = szero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + allocate(row_counts(a%get_nrows())) + row_counts = 0 + + nza = b%get_nzeros() + + !$acc parallel loop present(b, a, row_counts) + do k = 1, nza + row = b%ia(k) + col = b%ja(k) + value = b%val(k) + if (row_counts(row) < nz_per_row) then + a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value + a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col + row_counts(row) = row_counts(row) + 1 + else + info = psb_err_invalid_mat_state_ + !goto 9999 + end if + end do + + a%irn = row_counts + + !$acc parallel loop present(a) + do i = 1, a%get_nrows() + do j = 1, nz_per_row + if (a%ja(j + (i - 1) * nz_per_row) == i) then + a%idiag(i) = j + exit + end if + end do + end do + + ! Calculate hkoffs for HLL format + !$acc parallel loop present(a) + do i = 1, size(a%hkoffs) + a%hkoffs(i) = (i - 1) * hacksize + end do + + deallocate(row_counts) + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_hll_cp_from_coo +end submodule psb_s_oacc_hll_cp_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 new file mode 100644 index 00000000..4d023f8b --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_cp_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_s_hll_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_s_oacc_hll_cp_from_fmt +end submodule psb_s_oacc_hll_cp_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_hll_csmm.F90 b/openacc/impl/psb_s_oacc_hll_csmm.F90 new file mode 100644 index 00000000..803071eb --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_csmm_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_), intent(in) :: x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_hll_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_s_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop present(a, x, y) + do j = 1, nxy + do k = 1, nhacks + do i = a%hkoffs(k), a%hkoffs(k + 1) - 1 + y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_csmm +end submodule psb_s_oacc_hll_csmm_impl diff --git a/openacc/impl/psb_s_oacc_hll_csmv.F90 b/openacc/impl/psb_s_oacc_hll_csmv.F90 new file mode 100644 index 00000000..b3c0cae8 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_csmv.F90 @@ -0,0 +1,84 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_csmv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_), intent(in) :: x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, hksz, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_hll_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_s_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + ! This loop nest cannot be run with collapse, since + ! the inner loop extent varies. + !$acc parallel loop present(a, x, y) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j)) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_csmv +end submodule psb_s_oacc_hll_csmv_impl diff --git a/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 new file mode 100644 index 00000000..900b8982 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 @@ -0,0 +1,86 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_hll_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nhacks, hksz + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_s_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_oacc) + select type(yy => y) + type is (psb_s_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + !$acc parallel loop present(a, xx, yy) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i+1) - 1 + yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j)) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'hll_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_s_oacc_hll_inner_vect_sv +end submodule psb_s_oacc_hll_inner_vect_sv_impl diff --git a/openacc/impl/psb_s_oacc_hll_mold.F90 b/openacc/impl/psb_s_oacc_hll_mold.F90 new file mode 100644 index 00000000..1e43b65b --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_mold_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_mold(a, b, info) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'hll_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_s_oacc_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_s_oacc_hll_mold +end submodule psb_s_oacc_hll_mold_impl diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 new file mode 100644 index 00000000..08b553b7 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_mv_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_s_hll_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_hll_mv_from_coo +end submodule psb_s_oacc_hll_mv_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 new file mode 100644 index 00000000..d5867289 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_mv_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_s_hll_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_s_oacc_hll_mv_from_fmt +end submodule psb_s_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 new file mode 100644 index 00000000..7768d1ed --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 @@ -0,0 +1,29 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_oacc_hll_reallocate_nz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_hll_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_reallocate_nz +end submodule psb_s_oacc_hll_reallocate_nz_impl diff --git a/openacc/impl/psb_s_oacc_hll_scal.F90 b/openacc/impl/psb_s_oacc_hll_scal.F90 new file mode 100644 index 00000000..ae36465e --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_scal.F90 @@ -0,0 +1,62 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_scal_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_scal(d, a, info, side) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1 + a%val(j) = a%val(j) * d(k) + end do + end do + else if (side == 'R') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_scal +end submodule psb_s_oacc_hll_scal_impl diff --git a/openacc/impl/psb_s_oacc_hll_scals.F90 b/openacc/impl/psb_s_oacc_hll_scals.F90 new file mode 100644 index 00000000..360ea84d --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_scals.F90 @@ -0,0 +1,40 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_scals_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_scals(d, a, info) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + ! $ a parallel loop collapse(2) present(a) + !$acc parallel loop present(a) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_scals +end submodule psb_s_oacc_hll_scals_impl diff --git a/openacc/impl/psb_s_oacc_hll_vect_mv.F90 b/openacc/impl/psb_s_oacc_hll_vect_mv.F90 new file mode 100644 index 00000000..9d9e9197 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_vect_mv.F90 @@ -0,0 +1,67 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nhacks, hksz + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, nhacks, hksz + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), hkoffs(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, idx, k + real(psb_spk_) :: tmp + + info = 0 + + !$acc parallel loop present(val, ja, hkoffs, x, y) + do i = 1, nhacks + do k = 0, hksz - 1 + idx = hkoffs(i) + k + if (idx <= hkoffs(i + 1) - 1) then + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz + if (ja(j) > 0) then + tmp = tmp + val(j) * x(ja(j)) + end if + end do + y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) + end if + end do + end do + end subroutine inner_spmv + + end subroutine psb_s_oacc_hll_vect_mv +end submodule psb_s_oacc_hll_vect_mv_impl diff --git a/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 new file mode 100644 index 00000000..48a5e202 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 @@ -0,0 +1,47 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_z_ell_sparse_mat%allocate(m, n, nz_) + + if (.not.allocated(a%val)) then + allocate(a%val(m, nz_)) + allocate(a%ja(m, nz_)) + allocate(a%irn(m)) + allocate(a%idiag(m)) + end if + + a%val = zzero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_allocate_mnnz +end submodule psb_z_oacc_ell_allocate_mnnz_impl diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 new file mode 100644 index 00000000..e4d3b731 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 @@ -0,0 +1,78 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_cp_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, k, row, col, nz_per_row + complex(psb_dpk_) :: value + integer(psb_ipk_), allocatable :: row_counts(:) + integer(psb_ipk_) :: hacksize, nza + + info = psb_success_ + hacksize = 1 + + call a%set_nrows(b%get_nrows()) + call a%set_ncols(b%get_ncols()) + nz_per_row = a%nzt + + if (.not.allocated(a%val)) then + allocate(a%val(a%get_nrows(), nz_per_row)) + allocate(a%ja(a%get_nrows(), nz_per_row)) + allocate(a%irn(a%get_nrows())) + allocate(a%idiag(a%get_nrows())) + end if + a%val = zzero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + allocate(row_counts(a%get_nrows())) + row_counts = 0 + + nza = b%get_nzeros() + + !$acc parallel loop present(b, a, row_counts) + do k = 1, nza + row = b%ia(k) + col = b%ja(k) + value = b%val(k) + if (row_counts(row) < nz_per_row) then + a%val(row, row_counts(row) + 1) = value + a%ja(row, row_counts(row) + 1) = col + row_counts(row) = row_counts(row) + 1 + else + info = psb_err_invalid_mat_state_ + !goto 9999 + end if + end do + + a%irn = row_counts + + !$acc parallel loop present(a) + do i = 1, a%get_nrows() + do j = 1, nz_per_row + if (a%ja(i, j) == i) then + a%idiag(i) = j + exit + end if + end do + end do + + deallocate(row_counts) + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_ell_cp_from_coo +end submodule psb_z_oacc_ell_cp_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 new file mode 100644 index 00000000..98404ae2 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_cp_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_z_ell_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + end select + + end subroutine psb_z_oacc_ell_cp_from_fmt +end submodule psb_z_oacc_ell_cp_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_ell_csmm.F90 b/openacc/impl/psb_z_oacc_ell_csmm.F90 new file mode 100644 index 00000000..406ca8c5 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_csmm_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_), intent(in) :: x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_ell_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_z_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, n + do k = 1, nzt + y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_csmm +end submodule psb_z_oacc_ell_csmm_impl diff --git a/openacc/impl/psb_z_oacc_ell_csmv.F90 b/openacc/impl/psb_z_oacc_ell_csmv.F90 new file mode 100644 index 00000000..502dd4f1 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_csmv.F90 @@ -0,0 +1,82 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_csmv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_), intent(in) :: x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, nzt + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_ell_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_z_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nzt = a%nzt + + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + + !$acc parallel loop present(a, x, y) + do i = 1, m + do j = 1, nzt + y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j)) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_csmv +end submodule psb_z_oacc_ell_csmv_impl diff --git a/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 new file mode 100644 index 00000000..f445a6b4 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 @@ -0,0 +1,85 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_ell_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nzt + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_z_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_oacc) + select type(yy => y) + type is (psb_z_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nzt = a%nzt + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val, 1) + do j = 1, nzt + yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'ell_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_z_oacc_ell_inner_vect_sv +end submodule psb_z_oacc_ell_inner_vect_sv_impl diff --git a/openacc/impl/psb_z_oacc_ell_mold.F90 b/openacc/impl/psb_z_oacc_ell_mold.F90 new file mode 100644 index 00000000..fcc222de --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_mold_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_mold(a, b, info) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'ell_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_z_oacc_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_z_oacc_ell_mold +end submodule psb_z_oacc_ell_mold_impl diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 new file mode 100644 index 00000000..26388e5e --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_mv_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_z_ell_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_ell_mv_from_coo +end submodule psb_z_oacc_ell_mv_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 new file mode 100644 index 00000000..e0f75828 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_mv_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_z_ell_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + end select + + end subroutine psb_z_oacc_ell_mv_from_fmt +end submodule psb_z_oacc_ell_mv_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 new file mode 100644 index 00000000..8fd3ad77 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_oacc_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_ell_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_reallocate_nz +end submodule psb_z_oacc_ell_reallocate_nz_impl diff --git a/openacc/impl/psb_z_oacc_ell_scal.F90 b/openacc/impl/psb_z_oacc_ell_scal.F90 new file mode 100644 index 00000000..e3d25ccb --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_scal.F90 @@ -0,0 +1,58 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_scal_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_scal(d, a, info, side) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, m, nzt + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(a%ja(i, j)) + end do + end do + end if + else + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(j) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_scal +end submodule psb_z_oacc_ell_scal_impl diff --git a/openacc/impl/psb_z_oacc_ell_scals.F90 b/openacc/impl/psb_z_oacc_ell_scals.F90 new file mode 100644 index 00000000..c382627a --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_scals.F90 @@ -0,0 +1,39 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_scals_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_scals(d, a, info) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, nzt, m + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_scals +end submodule psb_z_oacc_ell_scals_impl diff --git a/openacc/impl/psb_z_oacc_ell_vect_mv.F90 b/openacc/impl/psb_z_oacc_ell_vect_mv.F90 new file mode 100644 index 00000000..ecb61adf --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_vect_mv.F90 @@ -0,0 +1,66 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nzt + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nzt = a%nzt + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n, nzt + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_) :: val(:,:), x(:), y(:) + integer(psb_ipk_) :: ja(:,:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + complex(psb_dpk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = 1, nzt + if (ja(i,j) > 0) then + tmp = tmp + val(i,j) * x(ja(i,j)) + end if + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_z_oacc_ell_vect_mv +end submodule psb_z_oacc_ell_vect_mv_impl diff --git a/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 new file mode 100644 index 00000000..c398d259 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 @@ -0,0 +1,53 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_z_hll_sparse_mat%allocate(m, n, nz_) + + hksz = a%hksz + nhacks = (m + hksz - 1) / hksz + + if (.not.allocated(a%val)) then + allocate(a%val(nz_ * m)) + allocate(a%ja(nz_ * m)) + allocate(a%irn(m)) + allocate(a%idiag(m)) + allocate(a%hkoffs(nhacks)) + end if + + a%val = zzero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + a%hkoffs = 0 + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_allocate_mnnz +end submodule psb_z_oacc_hll_allocate_mnnz_impl diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 new file mode 100644 index 00000000..62be2252 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 @@ -0,0 +1,85 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_cp_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, k, row, col, nz_per_row + complex(psb_dpk_) :: value + integer(psb_ipk_), allocatable :: row_counts(:) + integer(psb_ipk_) :: hacksize, nza + + info = psb_success_ + hacksize = 32 ! Assuming a default hack size of 32 + + call a%set_nrows(b%get_nrows()) + call a%set_ncols(b%get_ncols()) + nz_per_row = a%nzt + + if (.not.allocated(a%val)) then + allocate(a%val(nz_per_row * a%get_nrows())) + allocate(a%ja(nz_per_row * a%get_nrows())) + allocate(a%irn(a%get_nrows())) + allocate(a%idiag(a%get_nrows())) + allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) + end if + a%val = zzero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + allocate(row_counts(a%get_nrows())) + row_counts = 0 + + nza = b%get_nzeros() + + !$acc parallel loop present(b, a, row_counts) + do k = 1, nza + row = b%ia(k) + col = b%ja(k) + value = b%val(k) + if (row_counts(row) < nz_per_row) then + a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value + a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col + row_counts(row) = row_counts(row) + 1 + else + info = psb_err_invalid_mat_state_ + !goto 9999 + end if + end do + + a%irn = row_counts + + !$acc parallel loop present(a) + do i = 1, a%get_nrows() + do j = 1, nz_per_row + if (a%ja(j + (i - 1) * nz_per_row) == i) then + a%idiag(i) = j + exit + end if + end do + end do + + ! Calculate hkoffs for HLL format + !$acc parallel loop present(a) + do i = 1, size(a%hkoffs) + a%hkoffs(i) = (i - 1) * hacksize + end do + + deallocate(row_counts) + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_hll_cp_from_coo +end submodule psb_z_oacc_hll_cp_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 new file mode 100644 index 00000000..f267e1c6 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_cp_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_z_hll_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_z_oacc_hll_cp_from_fmt +end submodule psb_z_oacc_hll_cp_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_hll_csmm.F90 b/openacc/impl/psb_z_oacc_hll_csmm.F90 new file mode 100644 index 00000000..3cfe5b32 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_csmm_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_), intent(in) :: x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_hll_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_z_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop present(a, x, y) + do j = 1, nxy + do k = 1, nhacks + do i = a%hkoffs(k), a%hkoffs(k + 1) - 1 + y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_csmm +end submodule psb_z_oacc_hll_csmm_impl diff --git a/openacc/impl/psb_z_oacc_hll_csmv.F90 b/openacc/impl/psb_z_oacc_hll_csmv.F90 new file mode 100644 index 00000000..923bc061 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_csmv.F90 @@ -0,0 +1,84 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_csmv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_), intent(in) :: x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, hksz, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_hll_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_z_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + ! This loop nest cannot be run with collapse, since + ! the inner loop extent varies. + !$acc parallel loop present(a, x, y) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j)) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_csmv +end submodule psb_z_oacc_hll_csmv_impl diff --git a/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 new file mode 100644 index 00000000..1d068542 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 @@ -0,0 +1,86 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_hll_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nhacks, hksz + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_z_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_oacc) + select type(yy => y) + type is (psb_z_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + !$acc parallel loop present(a, xx, yy) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i+1) - 1 + yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j)) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'hll_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_z_oacc_hll_inner_vect_sv +end submodule psb_z_oacc_hll_inner_vect_sv_impl diff --git a/openacc/impl/psb_z_oacc_hll_mold.F90 b/openacc/impl/psb_z_oacc_hll_mold.F90 new file mode 100644 index 00000000..f614ad89 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_mold_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_mold(a, b, info) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'hll_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_z_oacc_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_z_oacc_hll_mold +end submodule psb_z_oacc_hll_mold_impl diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 new file mode 100644 index 00000000..2ff574d3 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_mv_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_z_hll_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_hll_mv_from_coo +end submodule psb_z_oacc_hll_mv_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 new file mode 100644 index 00000000..5fa00e38 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_mv_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_z_hll_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_z_oacc_hll_mv_from_fmt +end submodule psb_z_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 new file mode 100644 index 00000000..5b49efe5 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 @@ -0,0 +1,29 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_oacc_hll_reallocate_nz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_hll_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_reallocate_nz +end submodule psb_z_oacc_hll_reallocate_nz_impl diff --git a/openacc/impl/psb_z_oacc_hll_scal.F90 b/openacc/impl/psb_z_oacc_hll_scal.F90 new file mode 100644 index 00000000..a2f9aee7 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_scal.F90 @@ -0,0 +1,62 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_scal_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_scal(d, a, info, side) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1 + a%val(j) = a%val(j) * d(k) + end do + end do + else if (side == 'R') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_scal +end submodule psb_z_oacc_hll_scal_impl diff --git a/openacc/impl/psb_z_oacc_hll_scals.F90 b/openacc/impl/psb_z_oacc_hll_scals.F90 new file mode 100644 index 00000000..888115cd --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_scals.F90 @@ -0,0 +1,40 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_scals_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_scals(d, a, info) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + ! $ a parallel loop collapse(2) present(a) + !$acc parallel loop present(a) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_scals +end submodule psb_z_oacc_hll_scals_impl diff --git a/openacc/impl/psb_z_oacc_hll_vect_mv.F90 b/openacc/impl/psb_z_oacc_hll_vect_mv.F90 new file mode 100644 index 00000000..89d970c0 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_vect_mv.F90 @@ -0,0 +1,67 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nhacks, hksz + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, nhacks, hksz + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), hkoffs(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, idx, k + complex(psb_dpk_) :: tmp + + info = 0 + + !$acc parallel loop present(val, ja, hkoffs, x, y) + do i = 1, nhacks + do k = 0, hksz - 1 + idx = hkoffs(i) + k + if (idx <= hkoffs(i + 1) - 1) then + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz + if (ja(j) > 0) then + tmp = tmp + val(j) * x(ja(j)) + end if + end do + y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) + end if + end do + end do + end subroutine inner_spmv + + end subroutine psb_z_oacc_hll_vect_mv +end submodule psb_z_oacc_hll_vect_mv_impl diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90 new file mode 100644 index 00000000..102d41c5 --- /dev/null +++ b/openacc/psb_c_oacc_ell_mat_mod.F90 @@ -0,0 +1,341 @@ +module psb_c_oacc_ell_mat_mod + use iso_c_binding + use psb_c_mat_mod + use psb_c_ell_mat_mod + use psb_c_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_ell_sparse_mat) :: psb_c_oacc_ell_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => c_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => c_oacc_ell_sizeof + procedure, pass(a) :: is_host => c_oacc_ell_is_host + procedure, pass(a) :: is_sync => c_oacc_ell_is_sync + procedure, pass(a) :: is_dev => c_oacc_ell_is_dev + procedure, pass(a) :: set_host => c_oacc_ell_set_host + procedure, pass(a) :: set_sync => c_oacc_ell_set_sync + procedure, pass(a) :: set_dev => c_oacc_ell_set_dev + procedure, pass(a) :: sync_space => c_oacc_ell_sync_space + procedure, pass(a) :: sync => c_oacc_ell_sync + procedure, pass(a) :: free => c_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_c_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_oacc_ell_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_oacc_ell_csmm + procedure, pass(a) :: csmv => psb_c_oacc_ell_csmv + procedure, pass(a) :: scals => psb_c_oacc_ell_scals + procedure, pass(a) :: scalv => psb_c_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_c_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_c_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_c_oacc_ell_mold + + end type psb_c_oacc_ell_sparse_mat + + interface + module subroutine psb_c_oacc_ell_mold(a,b,info) + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_mold + end interface + + interface + module subroutine psb_c_oacc_ell_cp_from_fmt(a,b,info) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_cp_from_fmt + end interface + + interface + module subroutine psb_c_oacc_ell_mv_from_coo(a,b,info) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_mv_from_coo + end interface + + interface + module subroutine psb_c_oacc_ell_mv_from_fmt(a,b,info) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_mv_from_fmt + end interface + + interface + module subroutine psb_c_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_ell_vect_mv + end interface + + interface + module subroutine psb_c_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_ell_inner_vect_sv + end interface + + interface + module subroutine psb_c_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_ell_csmm + end interface + + interface + module subroutine psb_c_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_ell_csmv + end interface + + interface + module subroutine psb_c_oacc_ell_scals(d, a, info) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_scals + end interface + + interface + module subroutine psb_c_oacc_ell_scal(d,a,info,side) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_c_oacc_ell_scal + end interface + + interface + module subroutine psb_c_oacc_ell_reallocate_nz(nz,a) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_c_oacc_ell_reallocate_nz + end interface + + interface + module subroutine psb_c_oacc_ell_allocate_mnnz(m,n,a,nz) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_oacc_ell_allocate_mnnz + end interface + + interface + module subroutine psb_c_oacc_ell_cp_from_coo(a,b,info) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_cp_from_coo + end interface + +contains + + subroutine c_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + + call a%psb_c_ell_sparse_mat%free() + + return + end subroutine c_oacc_ell_free + + + function c_oacc_ell_sizeof(a) result(res) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + + end function c_oacc_ell_sizeof + + subroutine c_oacc_ell_sync_space(a) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call c_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + + contains + subroutine c_oacc_create_dev(v) + implicit none + complex(psb_spk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine c_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + end subroutine c_oacc_ell_sync_space + + + + function c_oacc_ell_is_host(a) result(res) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_oacc_ell_is_host + + function c_oacc_ell_is_sync(a) result(res) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_oacc_ell_is_sync + + function c_oacc_ell_is_dev(a) result(res) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_oacc_ell_is_dev + + subroutine c_oacc_ell_set_host(a) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_oacc_ell_set_host + + subroutine c_oacc_ell_set_sync(a) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_oacc_ell_set_sync + + subroutine c_oacc_ell_set_dev(a) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_oacc_ell_set_dev + + function c_oacc_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL_oacc' + end function c_oacc_ell_get_fmt + + subroutine c_oacc_ell_sync(a) + implicit none + class(psb_c_oacc_ell_sparse_mat), target, intent(in) :: a + class(psb_c_oacc_ell_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call c_oacc_ell_to_host(a%val) + call i_oacc_ell_to_host(a%ja) + call i_oacc_ell_to_host_scalar(a%irn) + call i_oacc_ell_to_host_scalar(a%idiag) + else if (a%is_host()) then + call c_oacc_ell_to_dev(a%val) + call i_oacc_ell_to_dev(a%ja) + call i_oacc_ell_to_dev_scalar(a%irn) + call i_oacc_ell_to_dev_scalar(a%idiag) + end if + call tmpa%set_sync() + end subroutine c_oacc_ell_sync + + subroutine c_oacc_ell_to_host(v) + implicit none + complex(psb_spk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine c_oacc_ell_to_host + + subroutine c_oacc_ell_to_host_scalar(v) + implicit none + complex(psb_spk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine c_oacc_ell_to_host_scalar + + subroutine i_oacc_ell_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev + + subroutine i_oacc_ell_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev_scalar + + subroutine i_oacc_ell_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host + + subroutine i_oacc_ell_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host_scalar +end module psb_c_oacc_ell_mat_mod diff --git a/openacc/psb_c_oacc_hll_mat_mod.F90 b/openacc/psb_c_oacc_hll_mat_mod.F90 new file mode 100644 index 00000000..faad0a1b --- /dev/null +++ b/openacc/psb_c_oacc_hll_mat_mod.F90 @@ -0,0 +1,352 @@ +module psb_c_oacc_hll_mat_mod + use iso_c_binding + use psb_c_mat_mod + use psb_c_hll_mat_mod + use psb_c_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_hll_sparse_mat) :: psb_c_oacc_hll_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => c_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => c_oacc_hll_sizeof + procedure, pass(a) :: is_host => c_oacc_hll_is_host + procedure, pass(a) :: is_sync => c_oacc_hll_is_sync + procedure, pass(a) :: is_dev => c_oacc_hll_is_dev + procedure, pass(a) :: set_host => c_oacc_hll_set_host + procedure, pass(a) :: set_sync => c_oacc_hll_set_sync + procedure, pass(a) :: set_dev => c_oacc_hll_set_dev + procedure, pass(a) :: sync_space => c_oacc_hll_sync_space + procedure, pass(a) :: sync => c_oacc_hll_sync + procedure, pass(a) :: free => c_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_c_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_oacc_hll_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_oacc_hll_csmm + procedure, pass(a) :: csmv => psb_c_oacc_hll_csmv + procedure, pass(a) :: scals => psb_c_oacc_hll_scals + procedure, pass(a) :: scalv => psb_c_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_c_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_c_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_c_oacc_hll_mold + + end type psb_c_oacc_hll_sparse_mat + + interface + module subroutine psb_c_oacc_hll_mold(a,b,info) + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_mold + end interface + + interface + module subroutine psb_c_oacc_hll_cp_from_fmt(a,b,info) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_cp_from_fmt + end interface + + interface + module subroutine psb_c_oacc_hll_mv_from_coo(a,b,info) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_mv_from_coo + end interface + + interface + module subroutine psb_c_oacc_hll_mv_from_fmt(a,b,info) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_mv_from_fmt + end interface + + interface + module subroutine psb_c_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_hll_vect_mv + end interface + + interface + module subroutine psb_c_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_hll_inner_vect_sv + end interface + + interface + module subroutine psb_c_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_hll_csmm + end interface + + interface + module subroutine psb_c_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_hll_csmv + end interface + + interface + module subroutine psb_c_oacc_hll_scals(d, a, info) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_scals + end interface + + interface + module subroutine psb_c_oacc_hll_scal(d,a,info,side) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_c_oacc_hll_scal + end interface + + interface + module subroutine psb_c_oacc_hll_reallocate_nz(nz,a) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_c_oacc_hll_reallocate_nz + end interface + + interface + module subroutine psb_c_oacc_hll_allocate_mnnz(m,n,a,nz) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_oacc_hll_allocate_mnnz + end interface + + interface + module subroutine psb_c_oacc_hll_cp_from_coo(a,b,info) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_cp_from_coo + end interface + +contains + + subroutine c_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + if (allocated(a%hkoffs)) then + !$acc exit data delete(a%hkoffs) + end if + + call a%psb_c_hll_sparse_mat%free() + + return + end subroutine c_oacc_hll_free + + function c_oacc_hll_sizeof(a) result(res) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + end function c_oacc_hll_sizeof + + + + function c_oacc_hll_is_host(a) result(res) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_oacc_hll_is_host + + function c_oacc_hll_is_sync(a) result(res) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_oacc_hll_is_sync + + function c_oacc_hll_is_dev(a) result(res) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_oacc_hll_is_dev + + subroutine c_oacc_hll_set_host(a) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_oacc_hll_set_host + + subroutine c_oacc_hll_set_sync(a) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_oacc_hll_set_sync + + subroutine c_oacc_hll_set_dev(a) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_oacc_hll_set_dev + + function c_oacc_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL_oacc' + end function c_oacc_hll_get_fmt + + subroutine c_oacc_hll_sync_space(a) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call c_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + if (allocated(a%hkoffs)) then + call i_oacc_create_dev_scalar(a%hkoffs) + end if + + contains + subroutine c_oacc_create_dev(v) + implicit none + complex(psb_spk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine c_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + + end subroutine c_oacc_hll_sync_space + + + subroutine c_oacc_hll_sync(a) + implicit none + class(psb_c_oacc_hll_sparse_mat), target, intent(in) :: a + class(psb_c_oacc_hll_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call c_oacc_hll_to_host(a%val) + call i_oacc_hll_to_host(a%ja) + call i_oacc_hll_to_host_scalar(a%irn) + call i_oacc_hll_to_host_scalar(a%idiag) + call i_oacc_hll_to_host_scalar(a%hkoffs) + else if (a%is_host()) then + call c_oacc_hll_to_dev(a%val) + call i_oacc_hll_to_dev(a%ja) + call i_oacc_hll_to_dev_scalar(a%irn) + call i_oacc_hll_to_dev_scalar(a%idiag) + call i_oacc_hll_to_dev_scalar(a%hkoffs) + end if + call tmpa%set_sync() + end subroutine c_oacc_hll_sync + + subroutine c_oacc_hll_to_host(v) + implicit none + complex(psb_spk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine c_oacc_hll_to_host + + subroutine c_oacc_hll_to_dev(v) + implicit none + complex(psb_spk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine c_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host + + subroutine i_oacc_hll_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host_scalar + + subroutine i_oacc_hll_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev_scalar + + +end module psb_c_oacc_hll_mat_mod diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90 index 6f9545ea..9225f159 100644 --- a/openacc/psb_c_oacc_vect_mod.F90 +++ b/openacc/psb_c_oacc_vect_mod.F90 @@ -42,6 +42,7 @@ module psb_c_oacc_vect_mod procedure, pass(y) :: sctb_buf => c_oacc_sctb_buf procedure, pass(x) :: get_size => c_oacc_get_size + procedure, pass(x) :: dot_v => c_oacc_vect_dot procedure, pass(x) :: dot_a => c_oacc_dot_a procedure, pass(y) :: axpby_v => c_oacc_axpby_v @@ -70,7 +71,6 @@ module psb_c_oacc_vect_mod end subroutine c_oacc_mlt_v end interface - interface subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) import @@ -83,7 +83,7 @@ module psb_c_oacc_vect_mod character(len=1), intent(in), optional :: conjgx, conjgy end subroutine c_oacc_mlt_v_2 end interface - + contains subroutine c_oacc_absval1(x) @@ -432,7 +432,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() if (y%is_host()) call y%sync_space() !$acc parallel loop @@ -459,7 +459,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'c_oacc_sctb_x') return @@ -475,8 +475,6 @@ contains call y%set_dev() end subroutine c_oacc_sctb_x - - subroutine c_oacc_sctb(n, idx, x, beta, y) use psb_base_mod implicit none @@ -498,7 +496,6 @@ contains call y%set_host() end subroutine c_oacc_sctb - subroutine c_oacc_gthzbuf(i, n, idx, x) use psb_base_mod implicit none @@ -515,7 +512,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'c_oacc_gthzbuf') return @@ -542,7 +539,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'c_oacc_gthzv_x') return @@ -577,7 +574,7 @@ contains select type(vval => val) type is (psb_c_vect_oacc) if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space(info) + if (virl%is_host()) call virl%sync_space() if (x%is_host()) call x%sync_space() !$acc parallel loop do i = 1, n @@ -591,7 +588,7 @@ contains if (.not.done_oacc) then select type(virl => irl) type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space(info) + if (virl%is_dev()) call virl%sync_space() end select select type(vval => val) type is (psb_c_vect_oacc) @@ -607,8 +604,6 @@ contains end subroutine c_oacc_ins_v - - subroutine c_oacc_ins_a(n, irl, val, dupl, x, info) use psi_serial_mod implicit none @@ -628,8 +623,6 @@ contains end subroutine c_oacc_ins_a - - subroutine c_oacc_bld_mn(x, n) use psb_base_mod implicit none @@ -668,7 +661,6 @@ contains end subroutine c_oacc_bld_x - subroutine c_oacc_asb_m(n, x, info) use psb_base_mod implicit none @@ -696,8 +688,6 @@ contains end if end subroutine c_oacc_asb_m - - subroutine c_oacc_set_scal(x, val, first, last) class(psb_c_vect_oacc), intent(inout) :: x complex(psb_spk_), intent(in) :: val @@ -718,8 +708,6 @@ contains call x%set_dev() end subroutine c_oacc_set_scal - - subroutine c_oacc_zero(x) use psi_serial_mod implicit none @@ -743,6 +731,7 @@ contains end function c_oacc_get_fmt + function c_oacc_vect_dot(n, x, y) result(res) implicit none class(psb_c_vect_oacc), intent(inout) :: x @@ -776,9 +765,6 @@ contains end function c_oacc_vect_dot - - - function c_oacc_dot_a(n, x, y) result(res) implicit none class(psb_c_vect_oacc), intent(inout) :: x @@ -910,7 +896,6 @@ contains end if end subroutine c_oacc_vect_all - subroutine c_oacc_vect_free(x, info) implicit none class(psb_c_vect_oacc), intent(inout) :: x diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90 index 8c5946ba..042c0ff3 100644 --- a/openacc/psb_d_oacc_ell_mat_mod.F90 +++ b/openacc/psb_d_oacc_ell_mat_mod.F90 @@ -1,343 +1,341 @@ module psb_d_oacc_ell_mat_mod - use iso_c_binding - use psb_d_mat_mod - use psb_d_ell_mat_mod - use psb_d_oacc_vect_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_d_ell_sparse_mat) :: psb_d_oacc_ell_sparse_mat - integer(psb_ipk_) :: devstate = is_host - contains - procedure, nopass :: get_fmt => d_oacc_ell_get_fmt - procedure, pass(a) :: sizeof => d_oacc_ell_sizeof - procedure, pass(a) :: is_host => d_oacc_ell_is_host - procedure, pass(a) :: is_sync => d_oacc_ell_is_sync - procedure, pass(a) :: is_dev => d_oacc_ell_is_dev - procedure, pass(a) :: set_host => d_oacc_ell_set_host - procedure, pass(a) :: set_sync => d_oacc_ell_set_sync - procedure, pass(a) :: set_dev => d_oacc_ell_set_dev - procedure, pass(a) :: sync_space => d_oacc_ell_sync_space - procedure, pass(a) :: sync => d_oacc_ell_sync - procedure, pass(a) :: free => d_oacc_ell_free - procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_oacc_ell_csmm - procedure, pass(a) :: csmv => psb_d_oacc_ell_csmv - procedure, pass(a) :: scals => psb_d_oacc_ell_scals - procedure, pass(a) :: scalv => psb_d_oacc_ell_scal - procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_oacc_ell_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_d_oacc_ell_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_oacc_ell_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_oacc_ell_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_oacc_ell_mv_from_fmt - procedure, pass(a) :: mold => psb_d_oacc_ell_mold - - end type psb_d_oacc_ell_sparse_mat - - interface - module subroutine psb_d_oacc_ell_mold(a,b,info) - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_ell_mold - end interface - - interface - module subroutine psb_d_oacc_ell_cp_from_fmt(a,b,info) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_ell_cp_from_fmt - end interface - - interface - module subroutine psb_d_oacc_ell_mv_from_coo(a,b,info) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_ell_mv_from_coo - end interface - - interface - module subroutine psb_d_oacc_ell_mv_from_fmt(a,b,info) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_ell_mv_from_fmt - end interface - - interface - module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x, y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_ell_vect_mv - end interface - - interface - module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x,y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_ell_inner_vect_sv - end interface - - interface - module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - real(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_ell_csmm - end interface - - interface - module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:) - real(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_ell_csmv - end interface - - interface - module subroutine psb_d_oacc_ell_scals(d, a, info) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_ell_scals - end interface - - interface - module subroutine psb_d_oacc_ell_scal(d,a,info,side) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: side - end subroutine psb_d_oacc_ell_scal - end interface - - interface - module subroutine psb_d_oacc_ell_reallocate_nz(nz,a) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: nz - end subroutine psb_d_oacc_ell_reallocate_nz - end interface - - interface - module subroutine psb_d_oacc_ell_allocate_mnnz(m,n,a,nz) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: m,n - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_oacc_ell_allocate_mnnz - end interface - - interface - module subroutine psb_d_oacc_ell_cp_from_coo(a,b,info) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_ell_cp_from_coo - end interface - - contains - - subroutine d_oacc_ell_free(a) - use psb_base_mod - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - integer(psb_ipk_) :: info - - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if - - call a%psb_d_ell_sparse_mat%free() - - return - end subroutine d_oacc_ell_free - - - function d_oacc_ell_sizeof(a) result(res) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - - if (a%is_dev()) call a%sync() - - res = 8 - res = res + psb_sizeof_dp * size(a%val) - res = res + psb_sizeof_ip * size(a%ja) - res = res + psb_sizeof_ip * size(a%irn) - res = res + psb_sizeof_ip * size(a%idiag) - - end function d_oacc_ell_sizeof - - subroutine d_oacc_ell_sync_space(a) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - - if (allocated(a%val)) then - call d_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - - contains - subroutine d_oacc_create_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine d_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar - end subroutine d_oacc_ell_sync_space - - - - function d_oacc_ell_is_host(a) result(res) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function d_oacc_ell_is_host - - function d_oacc_ell_is_sync(a) result(res) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function d_oacc_ell_is_sync - - function d_oacc_ell_is_dev(a) result(res) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function d_oacc_ell_is_dev - - subroutine d_oacc_ell_set_host(a) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine d_oacc_ell_set_host - - subroutine d_oacc_ell_set_sync(a) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine d_oacc_ell_set_sync - - subroutine d_oacc_ell_set_dev(a) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine d_oacc_ell_set_dev - - function d_oacc_ell_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'ELL_oacc' - end function d_oacc_ell_get_fmt - - subroutine d_oacc_ell_sync(a) - implicit none - class(psb_d_oacc_ell_sparse_mat), target, intent(in) :: a - class(psb_d_oacc_ell_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (a%is_dev()) then - call d_oacc_ell_to_host(a%val) - call i_oacc_ell_to_host(a%ja) - call i_oacc_ell_to_host_scalar(a%irn) - call i_oacc_ell_to_host_scalar(a%idiag) - else if (a%is_host()) then - call d_oacc_ell_to_dev(a%val) - call i_oacc_ell_to_dev(a%ja) - call i_oacc_ell_to_dev_scalar(a%irn) - call i_oacc_ell_to_dev_scalar(a%idiag) - end if - call tmpa%set_sync() - end subroutine d_oacc_ell_sync - - subroutine d_oacc_ell_to_host(v) - implicit none - real(psb_dpk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine d_oacc_ell_to_host - - subroutine d_oacc_ell_to_host_scalar(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine d_oacc_ell_to_host_scalar - - subroutine i_oacc_ell_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev - - subroutine i_oacc_ell_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev_scalar - - subroutine i_oacc_ell_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host - - subroutine i_oacc_ell_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host_scalar - - -end module psb_d_oacc_ell_mat_mod \ No newline at end of file + use iso_c_binding + use psb_d_mat_mod + use psb_d_ell_mat_mod + use psb_d_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_ell_sparse_mat) :: psb_d_oacc_ell_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => d_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => d_oacc_ell_sizeof + procedure, pass(a) :: is_host => d_oacc_ell_is_host + procedure, pass(a) :: is_sync => d_oacc_ell_is_sync + procedure, pass(a) :: is_dev => d_oacc_ell_is_dev + procedure, pass(a) :: set_host => d_oacc_ell_set_host + procedure, pass(a) :: set_sync => d_oacc_ell_set_sync + procedure, pass(a) :: set_dev => d_oacc_ell_set_dev + procedure, pass(a) :: sync_space => d_oacc_ell_sync_space + procedure, pass(a) :: sync => d_oacc_ell_sync + procedure, pass(a) :: free => d_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_oacc_ell_csmm + procedure, pass(a) :: csmv => psb_d_oacc_ell_csmv + procedure, pass(a) :: scals => psb_d_oacc_ell_scals + procedure, pass(a) :: scalv => psb_d_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_d_oacc_ell_mold + + end type psb_d_oacc_ell_sparse_mat + + interface + module subroutine psb_d_oacc_ell_mold(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_mold + end interface + + interface + module subroutine psb_d_oacc_ell_cp_from_fmt(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_cp_from_fmt + end interface + + interface + module subroutine psb_d_oacc_ell_mv_from_coo(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_mv_from_coo + end interface + + interface + module subroutine psb_d_oacc_ell_mv_from_fmt(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_mv_from_fmt + end interface + + interface + module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_vect_mv + end interface + + interface + module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_inner_vect_sv + end interface + + interface + module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_csmm + end interface + + interface + module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_csmv + end interface + + interface + module subroutine psb_d_oacc_ell_scals(d, a, info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_scals + end interface + + interface + module subroutine psb_d_oacc_ell_scal(d,a,info,side) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_d_oacc_ell_scal + end interface + + interface + module subroutine psb_d_oacc_ell_reallocate_nz(nz,a) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_d_oacc_ell_reallocate_nz + end interface + + interface + module subroutine psb_d_oacc_ell_allocate_mnnz(m,n,a,nz) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_oacc_ell_allocate_mnnz + end interface + + interface + module subroutine psb_d_oacc_ell_cp_from_coo(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_cp_from_coo + end interface + +contains + + subroutine d_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + + call a%psb_d_ell_sparse_mat%free() + + return + end subroutine d_oacc_ell_free + + + function d_oacc_ell_sizeof(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + + end function d_oacc_ell_sizeof + + subroutine d_oacc_ell_sync_space(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call d_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + + contains + subroutine d_oacc_create_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine d_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + end subroutine d_oacc_ell_sync_space + + + + function d_oacc_ell_is_host(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_oacc_ell_is_host + + function d_oacc_ell_is_sync(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_oacc_ell_is_sync + + function d_oacc_ell_is_dev(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_oacc_ell_is_dev + + subroutine d_oacc_ell_set_host(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_oacc_ell_set_host + + subroutine d_oacc_ell_set_sync(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_oacc_ell_set_sync + + subroutine d_oacc_ell_set_dev(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_oacc_ell_set_dev + + function d_oacc_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL_oacc' + end function d_oacc_ell_get_fmt + + subroutine d_oacc_ell_sync(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), target, intent(in) :: a + class(psb_d_oacc_ell_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call d_oacc_ell_to_host(a%val) + call i_oacc_ell_to_host(a%ja) + call i_oacc_ell_to_host_scalar(a%irn) + call i_oacc_ell_to_host_scalar(a%idiag) + else if (a%is_host()) then + call d_oacc_ell_to_dev(a%val) + call i_oacc_ell_to_dev(a%ja) + call i_oacc_ell_to_dev_scalar(a%irn) + call i_oacc_ell_to_dev_scalar(a%idiag) + end if + call tmpa%set_sync() + end subroutine d_oacc_ell_sync + + subroutine d_oacc_ell_to_host(v) + implicit none + real(psb_dpk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine d_oacc_ell_to_host + + subroutine d_oacc_ell_to_host_scalar(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine d_oacc_ell_to_host_scalar + + subroutine i_oacc_ell_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev + + subroutine i_oacc_ell_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev_scalar + + subroutine i_oacc_ell_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host + + subroutine i_oacc_ell_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host_scalar +end module psb_d_oacc_ell_mat_mod diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90 index 530af94a..b1c36a65 100644 --- a/openacc/psb_d_oacc_hll_mat_mod.F90 +++ b/openacc/psb_d_oacc_hll_mat_mod.F90 @@ -1,352 +1,352 @@ module psb_d_oacc_hll_mat_mod - use iso_c_binding - use psb_d_mat_mod - use psb_d_hll_mat_mod - use psb_d_oacc_vect_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_d_hll_sparse_mat) :: psb_d_oacc_hll_sparse_mat - integer(psb_ipk_) :: devstate = is_host - contains - procedure, nopass :: get_fmt => d_oacc_hll_get_fmt - procedure, pass(a) :: sizeof => d_oacc_hll_sizeof - procedure, pass(a) :: is_host => d_oacc_hll_is_host - procedure, pass(a) :: is_sync => d_oacc_hll_is_sync - procedure, pass(a) :: is_dev => d_oacc_hll_is_dev - procedure, pass(a) :: set_host => d_oacc_hll_set_host - procedure, pass(a) :: set_sync => d_oacc_hll_set_sync - procedure, pass(a) :: set_dev => d_oacc_hll_set_dev - procedure, pass(a) :: sync_space => d_oacc_hll_sync_space - procedure, pass(a) :: sync => d_oacc_hll_sync - procedure, pass(a) :: free => d_oacc_hll_free - procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_oacc_hll_csmm - procedure, pass(a) :: csmv => psb_d_oacc_hll_csmv - procedure, pass(a) :: scals => psb_d_oacc_hll_scals - procedure, pass(a) :: scalv => psb_d_oacc_hll_scal - procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_oacc_hll_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_d_oacc_hll_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_oacc_hll_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_oacc_hll_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_oacc_hll_mv_from_fmt - procedure, pass(a) :: mold => psb_d_oacc_hll_mold - - end type psb_d_oacc_hll_sparse_mat - - interface - module subroutine psb_d_oacc_hll_mold(a,b,info) - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_hll_mold - end interface - - interface - module subroutine psb_d_oacc_hll_cp_from_fmt(a,b,info) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_hll_cp_from_fmt - end interface - - interface - module subroutine psb_d_oacc_hll_mv_from_coo(a,b,info) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_hll_mv_from_coo - end interface - - interface - module subroutine psb_d_oacc_hll_mv_from_fmt(a,b,info) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_hll_mv_from_fmt - end interface - - interface - module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x, y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_hll_vect_mv - end interface - - interface - module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x,y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_hll_inner_vect_sv - end interface - - interface - module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - real(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_hll_csmm - end interface - - interface - module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:) - real(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_hll_csmv - end interface - - interface - module subroutine psb_d_oacc_hll_scals(d, a, info) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_hll_scals - end interface - - interface - module subroutine psb_d_oacc_hll_scal(d,a,info,side) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: side - end subroutine psb_d_oacc_hll_scal - end interface - - interface - module subroutine psb_d_oacc_hll_reallocate_nz(nz,a) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: nz - end subroutine psb_d_oacc_hll_reallocate_nz - end interface - - interface - module subroutine psb_d_oacc_hll_allocate_mnnz(m,n,a,nz) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: m,n - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_oacc_hll_allocate_mnnz - end interface - - interface - module subroutine psb_d_oacc_hll_cp_from_coo(a,b,info) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_hll_cp_from_coo - end interface - - contains - - subroutine d_oacc_hll_free(a) - use psb_base_mod - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - integer(psb_ipk_) :: info - - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if - if (allocated(a%hkoffs)) then - !$acc exit data delete(a%hkoffs) - end if - - call a%psb_d_hll_sparse_mat%free() - - return - end subroutine d_oacc_hll_free - - function d_oacc_hll_sizeof(a) result(res) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - - if (a%is_dev()) call a%sync() - - res = 8 - res = res + psb_sizeof_dp * size(a%val) - res = res + psb_sizeof_ip * size(a%ja) - res = res + psb_sizeof_ip * size(a%irn) - res = res + psb_sizeof_ip * size(a%idiag) - res = res + psb_sizeof_ip * size(a%hkoffs) - end function d_oacc_hll_sizeof - - - - function d_oacc_hll_is_host(a) result(res) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function d_oacc_hll_is_host - - function d_oacc_hll_is_sync(a) result(res) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function d_oacc_hll_is_sync - - function d_oacc_hll_is_dev(a) result(res) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function d_oacc_hll_is_dev - - subroutine d_oacc_hll_set_host(a) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine d_oacc_hll_set_host - - subroutine d_oacc_hll_set_sync(a) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine d_oacc_hll_set_sync - - subroutine d_oacc_hll_set_dev(a) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine d_oacc_hll_set_dev - - function d_oacc_hll_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'HLL_oacc' - end function d_oacc_hll_get_fmt - - subroutine d_oacc_hll_sync_space(a) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - - if (allocated(a%val)) then - call d_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - if (allocated(a%hkoffs)) then - call i_oacc_create_dev_scalar(a%hkoffs) - end if - - contains - subroutine d_oacc_create_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine d_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar - - end subroutine d_oacc_hll_sync_space - - - subroutine d_oacc_hll_sync(a) - implicit none - class(psb_d_oacc_hll_sparse_mat), target, intent(in) :: a - class(psb_d_oacc_hll_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (a%is_dev()) then - call d_oacc_hll_to_host(a%val) - call i_oacc_hll_to_host(a%ja) - call i_oacc_hll_to_host_scalar(a%irn) - call i_oacc_hll_to_host_scalar(a%idiag) - call i_oacc_hll_to_host_scalar(a%hkoffs) - else if (a%is_host()) then - call d_oacc_hll_to_dev(a%val) - call i_oacc_hll_to_dev(a%ja) - call i_oacc_hll_to_dev_scalar(a%irn) - call i_oacc_hll_to_dev_scalar(a%idiag) - call i_oacc_hll_to_dev_scalar(a%hkoffs) - end if - call tmpa%set_sync() - end subroutine d_oacc_hll_sync - - subroutine d_oacc_hll_to_host(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine d_oacc_hll_to_host - - subroutine d_oacc_hll_to_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine d_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host - - subroutine i_oacc_hll_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host_scalar - - subroutine i_oacc_hll_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev_scalar - - -end module psb_d_oacc_hll_mat_mod \ No newline at end of file + use iso_c_binding + use psb_d_mat_mod + use psb_d_hll_mat_mod + use psb_d_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_hll_sparse_mat) :: psb_d_oacc_hll_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => d_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => d_oacc_hll_sizeof + procedure, pass(a) :: is_host => d_oacc_hll_is_host + procedure, pass(a) :: is_sync => d_oacc_hll_is_sync + procedure, pass(a) :: is_dev => d_oacc_hll_is_dev + procedure, pass(a) :: set_host => d_oacc_hll_set_host + procedure, pass(a) :: set_sync => d_oacc_hll_set_sync + procedure, pass(a) :: set_dev => d_oacc_hll_set_dev + procedure, pass(a) :: sync_space => d_oacc_hll_sync_space + procedure, pass(a) :: sync => d_oacc_hll_sync + procedure, pass(a) :: free => d_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_oacc_hll_csmm + procedure, pass(a) :: csmv => psb_d_oacc_hll_csmv + procedure, pass(a) :: scals => psb_d_oacc_hll_scals + procedure, pass(a) :: scalv => psb_d_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_d_oacc_hll_mold + + end type psb_d_oacc_hll_sparse_mat + + interface + module subroutine psb_d_oacc_hll_mold(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_mold + end interface + + interface + module subroutine psb_d_oacc_hll_cp_from_fmt(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_cp_from_fmt + end interface + + interface + module subroutine psb_d_oacc_hll_mv_from_coo(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_mv_from_coo + end interface + + interface + module subroutine psb_d_oacc_hll_mv_from_fmt(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_mv_from_fmt + end interface + + interface + module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_vect_mv + end interface + + interface + module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_inner_vect_sv + end interface + + interface + module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_csmm + end interface + + interface + module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_csmv + end interface + + interface + module subroutine psb_d_oacc_hll_scals(d, a, info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_scals + end interface + + interface + module subroutine psb_d_oacc_hll_scal(d,a,info,side) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_d_oacc_hll_scal + end interface + + interface + module subroutine psb_d_oacc_hll_reallocate_nz(nz,a) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_d_oacc_hll_reallocate_nz + end interface + + interface + module subroutine psb_d_oacc_hll_allocate_mnnz(m,n,a,nz) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_oacc_hll_allocate_mnnz + end interface + + interface + module subroutine psb_d_oacc_hll_cp_from_coo(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_cp_from_coo + end interface + +contains + + subroutine d_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + if (allocated(a%hkoffs)) then + !$acc exit data delete(a%hkoffs) + end if + + call a%psb_d_hll_sparse_mat%free() + + return + end subroutine d_oacc_hll_free + + function d_oacc_hll_sizeof(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + end function d_oacc_hll_sizeof + + + + function d_oacc_hll_is_host(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_oacc_hll_is_host + + function d_oacc_hll_is_sync(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_oacc_hll_is_sync + + function d_oacc_hll_is_dev(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_oacc_hll_is_dev + + subroutine d_oacc_hll_set_host(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_oacc_hll_set_host + + subroutine d_oacc_hll_set_sync(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_oacc_hll_set_sync + + subroutine d_oacc_hll_set_dev(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_oacc_hll_set_dev + + function d_oacc_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL_oacc' + end function d_oacc_hll_get_fmt + + subroutine d_oacc_hll_sync_space(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call d_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + if (allocated(a%hkoffs)) then + call i_oacc_create_dev_scalar(a%hkoffs) + end if + + contains + subroutine d_oacc_create_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine d_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + + end subroutine d_oacc_hll_sync_space + + + subroutine d_oacc_hll_sync(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), target, intent(in) :: a + class(psb_d_oacc_hll_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call d_oacc_hll_to_host(a%val) + call i_oacc_hll_to_host(a%ja) + call i_oacc_hll_to_host_scalar(a%irn) + call i_oacc_hll_to_host_scalar(a%idiag) + call i_oacc_hll_to_host_scalar(a%hkoffs) + else if (a%is_host()) then + call d_oacc_hll_to_dev(a%val) + call i_oacc_hll_to_dev(a%ja) + call i_oacc_hll_to_dev_scalar(a%irn) + call i_oacc_hll_to_dev_scalar(a%idiag) + call i_oacc_hll_to_dev_scalar(a%hkoffs) + end if + call tmpa%set_sync() + end subroutine d_oacc_hll_sync + + subroutine d_oacc_hll_to_host(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine d_oacc_hll_to_host + + subroutine d_oacc_hll_to_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine d_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host + + subroutine i_oacc_hll_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host_scalar + + subroutine i_oacc_hll_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev_scalar + + +end module psb_d_oacc_hll_mat_mod diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index 7d51766d..0dff0f27 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -42,6 +42,7 @@ module psb_d_oacc_vect_mod procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf procedure, pass(x) :: get_size => d_oacc_get_size + procedure, pass(x) :: dot_v => d_oacc_vect_dot procedure, pass(x) :: dot_a => d_oacc_dot_a procedure, pass(y) :: axpby_v => d_oacc_axpby_v @@ -70,7 +71,6 @@ module psb_d_oacc_vect_mod end subroutine d_oacc_mlt_v end interface - interface subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) import @@ -83,7 +83,7 @@ module psb_d_oacc_vect_mod character(len=1), intent(in), optional :: conjgx, conjgy end subroutine d_oacc_mlt_v_2 end interface - + contains subroutine d_oacc_absval1(x) @@ -432,7 +432,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() if (y%is_host()) call y%sync_space() !$acc parallel loop @@ -459,7 +459,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'd_oacc_sctb_x') return @@ -475,8 +475,6 @@ contains call y%set_dev() end subroutine d_oacc_sctb_x - - subroutine d_oacc_sctb(n, idx, x, beta, y) use psb_base_mod implicit none @@ -498,7 +496,6 @@ contains call y%set_host() end subroutine d_oacc_sctb - subroutine d_oacc_gthzbuf(i, n, idx, x) use psb_base_mod implicit none @@ -515,7 +512,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'd_oacc_gthzbuf') return @@ -542,7 +539,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'd_oacc_gthzv_x') return @@ -577,7 +574,7 @@ contains select type(vval => val) type is (psb_d_vect_oacc) if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space(info) + if (virl%is_host()) call virl%sync_space() if (x%is_host()) call x%sync_space() !$acc parallel loop do i = 1, n @@ -591,7 +588,7 @@ contains if (.not.done_oacc) then select type(virl => irl) type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space(info) + if (virl%is_dev()) call virl%sync_space() end select select type(vval => val) type is (psb_d_vect_oacc) @@ -607,8 +604,6 @@ contains end subroutine d_oacc_ins_v - - subroutine d_oacc_ins_a(n, irl, val, dupl, x, info) use psi_serial_mod implicit none @@ -628,8 +623,6 @@ contains end subroutine d_oacc_ins_a - - subroutine d_oacc_bld_mn(x, n) use psb_base_mod implicit none @@ -668,7 +661,6 @@ contains end subroutine d_oacc_bld_x - subroutine d_oacc_asb_m(n, x, info) use psb_base_mod implicit none @@ -696,8 +688,6 @@ contains end if end subroutine d_oacc_asb_m - - subroutine d_oacc_set_scal(x, val, first, last) class(psb_d_vect_oacc), intent(inout) :: x real(psb_dpk_), intent(in) :: val @@ -718,8 +708,6 @@ contains call x%set_dev() end subroutine d_oacc_set_scal - - subroutine d_oacc_zero(x) use psi_serial_mod implicit none @@ -743,6 +731,7 @@ contains end function d_oacc_get_fmt + function d_oacc_vect_dot(n, x, y) result(res) implicit none class(psb_d_vect_oacc), intent(inout) :: x @@ -776,9 +765,6 @@ contains end function d_oacc_vect_dot - - - function d_oacc_dot_a(n, x, y) result(res) implicit none class(psb_d_vect_oacc), intent(inout) :: x @@ -910,7 +896,6 @@ contains end if end subroutine d_oacc_vect_all - subroutine d_oacc_vect_free(x, info) implicit none class(psb_d_vect_oacc), intent(inout) :: x diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90 index 70fc325e..72e9ada2 100644 --- a/openacc/psb_i_oacc_vect_mod.F90 +++ b/openacc/psb_i_oacc_vect_mod.F90 @@ -1,455 +1,505 @@ module psb_i_oacc_vect_mod - use iso_c_binding - use psb_const_mod - use psb_error_mod - use psb_i_vect_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_i_base_vect_type) :: psb_i_vect_oacc - integer :: state = is_host - contains - procedure, pass(x) :: get_nrows => i_oacc_get_nrows - procedure, nopass :: get_fmt => i_oacc_get_fmt - - procedure, pass(x) :: all => i_oacc_all - procedure, pass(x) :: zero => i_oacc_zero - procedure, pass(x) :: asb_m => i_oacc_asb_m - procedure, pass(x) :: sync => i_oacc_sync - procedure, pass(x) :: sync_space => i_oacc_sync_space - procedure, pass(x) :: bld_x => i_oacc_bld_x - procedure, pass(x) :: bld_mn => i_oacc_bld_mn - procedure, pass(x) :: free => i_oacc_free - procedure, pass(x) :: ins_a => i_oacc_ins_a - procedure, pass(x) :: ins_v => i_oacc_ins_v - procedure, pass(x) :: is_host => i_oacc_is_host - procedure, pass(x) :: is_dev => i_oacc_is_dev - procedure, pass(x) :: is_sync => i_oacc_is_sync - procedure, pass(x) :: set_host => i_oacc_set_host - procedure, pass(x) :: set_dev => i_oacc_set_dev - procedure, pass(x) :: set_sync => i_oacc_set_sync - procedure, pass(x) :: set_scal => i_oacc_set_scal - procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x - procedure, pass(y) :: sctb => i_oacc_sctb - procedure, pass(y) :: sctb_x => i_oacc_sctb_x - procedure, pass(x) :: gthzbuf => i_oacc_gthzbuf - procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf - - final :: i_oacc_vect_finalize - end type psb_i_vect_oacc - - public :: psb_i_vect_oacc_ - private :: constructor - interface psb_i_vect_oacc_ - module procedure constructor - end interface psb_i_vect_oacc_ + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_i_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_base_vect_type) :: psb_i_vect_oacc + integer :: state = is_host + + contains + procedure, pass(x) :: get_nrows => i_oacc_get_nrows + procedure, nopass :: get_fmt => i_oacc_get_fmt + + procedure, pass(x) :: all => i_oacc_vect_all + procedure, pass(x) :: zero => i_oacc_zero + procedure, pass(x) :: asb_m => i_oacc_asb_m + procedure, pass(x) :: sync => i_oacc_sync + procedure, pass(x) :: sync_space => i_oacc_sync_space + procedure, pass(x) :: bld_x => i_oacc_bld_x + procedure, pass(x) :: bld_mn => i_oacc_bld_mn + procedure, pass(x) :: free => i_oacc_vect_free + procedure, pass(x) :: ins_a => i_oacc_ins_a + procedure, pass(x) :: ins_v => i_oacc_ins_v + procedure, pass(x) :: is_host => i_oacc_is_host + procedure, pass(x) :: is_dev => i_oacc_is_dev + procedure, pass(x) :: is_sync => i_oacc_is_sync + procedure, pass(x) :: set_host => i_oacc_set_host + procedure, pass(x) :: set_dev => i_oacc_set_dev + procedure, pass(x) :: set_sync => i_oacc_set_sync + procedure, pass(x) :: set_scal => i_oacc_set_scal + + procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => i_oacc_gthzbuf + procedure, pass(y) :: sctb => i_oacc_sctb + procedure, pass(y) :: sctb_x => i_oacc_sctb_x + procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf + + procedure, pass(x) :: get_size => i_oacc_get_size + + + end type psb_i_vect_oacc + contains - function constructor(x) result(this) - integer(psb_ipk_) :: x(:) - type(psb_i_vect_oacc) :: this - integer(psb_ipk_) :: info - this%v = x - call this%asb(size(x), info) - end function constructor + subroutine i_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta + class(psb_i_vect_oacc) :: y + integer(psb_ipk_) :: info + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if - subroutine i_oacc_gthzv_x(i, n, idx, x, y) - implicit none - integer(psb_ipk_) :: i, n - class(psb_i_base_vect_type) :: idx - integer(psb_ipk_) :: y(:) - class(psb_i_vect_oacc) :: x - integer :: info + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + if (y%is_host()) call y%sync_space() - !$acc parallel loop - do i = 1, n - y(i) = x%v(idx%v(i)) - end do - end subroutine i_oacc_gthzv_x + !$acc parallel loop + do i = 1, n + y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + end do - subroutine i_oacc_gthzbuf(i, n, idx, x) - implicit none - integer(psb_ipk_) :: i, n - class(psb_i_base_vect_type) :: idx - class(psb_i_vect_oacc) :: x - integer :: info + class default + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + end do + end select + end subroutine i_oacc_sctb_buf - if (.not.allocated(x%combuf)) then - call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') - return - end if + subroutine i_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_oacc) :: y + integer(psb_ipk_) :: info, ni - !$acc parallel loop - do i = 1, n - x%combuf(i) = x%v(idx%v(i)) - end do - end subroutine i_oacc_gthzbuf + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + class default + call psb_errpush(info, 'i_oacc_sctb_x') + return + end select - subroutine i_oacc_sctb(n, idx, x, beta, y) - implicit none - integer(psb_ipk_) :: n, idx(:) - integer(psb_ipk_) :: beta, x(:) - class(psb_i_vect_oacc) :: y - integer(psb_ipk_) :: info - integer :: i + if (y%is_host()) call y%sync_space() - if (n == 0) return + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) + end do + call y%set_dev() + end subroutine i_oacc_sctb_x + + subroutine i_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine i_oacc_sctb + + subroutine i_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + class(psb_i_vect_oacc) :: x + integer(psb_ipk_) :: info + + info = 0 + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + class default + call psb_errpush(info, 'i_oacc_gthzbuf') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + x%combuf(i) = x%v(idx%v(i)) + end do + end subroutine i_oacc_gthzbuf + + subroutine i_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type):: idx + integer(psb_ipk_) :: y(:) + class(psb_i_vect_oacc):: x + integer(psb_ipk_) :: info + + info = 0 + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + class default + call psb_errpush(info, 'i_oacc_gthzv_x') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + y(i) = x%v(idx%v(i)) + end do + end subroutine i_oacc_gthzv_x + + subroutine i_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_i_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_i_vect_oacc) + if (vval%is_host()) call vval%sync_space() + if (virl%is_host()) call virl%sync_space() + if (x%is_host()) call x%sync_space() !$acc parallel loop do i = 1, n - y%v(idx(i)) = beta * y%v(idx(i)) + x(i) - end do - end subroutine i_oacc_sctb - - subroutine i_oacc_sctb_x(i, n, idx, x, beta, y) - implicit none - integer(psb_ipk_) :: i, n - class(psb_i_base_vect_type) :: idx - integer(psb_ipk_) :: beta, x(:) - class(psb_i_vect_oacc) :: y - integer :: info - - select type(ii => idx) - class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) - if (y%is_host()) call y%sync_space(info) - - !$acc parallel loop - do i = 1, n - y%v(ii%v(i)) = beta * y%v(ii%v(i)) + x(i) - end do - - class default - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) - end do - end select - end subroutine i_oacc_sctb_x - - subroutine i_oacc_sctb_buf(i, n, idx, beta, y) - implicit none - integer(psb_ipk_) :: i, n - class(psb_i_base_vect_type) :: idx - integer(psb_ipk_) :: beta - class(psb_i_vect_oacc) :: y - integer(psb_ipk_) :: info - - if (.not.allocated(y%v)) then - call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') - return - end if - - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%v(i) - end do - end subroutine i_oacc_sctb_buf - - subroutine i_oacc_set_host(x) - class(psb_i_vect_oacc), intent(inout) :: x - x%state = is_host - end subroutine i_oacc_set_host - - subroutine i_oacc_set_sync(x) - class(psb_i_vect_oacc), intent(inout) :: x - x%state = is_sync - end subroutine i_oacc_set_sync - - subroutine i_oacc_set_dev(x) - class(psb_i_vect_oacc), intent(inout) :: x - x%state = is_dev - end subroutine i_oacc_set_dev - - subroutine i_oacc_set_scal(x, val, first, last) - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: val - integer(psb_ipk_), optional :: first, last - - integer(psb_ipk_) :: first_, last_ - - first_ = 1 - last_ = size(x%v) - if (present(first)) first_ = max(1, first) - if (present(last)) last_ = min(size(x%v), last) - - !$acc parallel loop - do i = first_, last_ - x%v(i) = val + x%v(virl%v(i)) = vval%v(i) end do call x%set_dev() - end subroutine i_oacc_set_scal - - function i_oacc_is_host(x) result(res) - implicit none - class(psb_i_vect_oacc), intent(in) :: x - logical :: res - - res = (x%state == is_host) - end function i_oacc_is_host - - function i_oacc_is_dev(x) result(res) - implicit none - class(psb_i_vect_oacc), intent(in) :: x - logical :: res - - res = (x%state == is_dev) - end function i_oacc_is_dev - - function i_oacc_is_sync(x) result(res) - implicit none - class(psb_i_vect_oacc), intent(in) :: x - logical :: res - - res = (x%state == is_sync) - end function i_oacc_is_sync - - subroutine i_oacc_free(x, info) - use psb_error_mod - implicit none - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(out) :: info - - info = 0 - if (allocated(x%v)) deallocate(x%v, stat=info) - if (info /= 0) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info, 'i_oacc_free') - end if - call x%set_sync() - end subroutine i_oacc_free - - subroutine i_oacc_ins_a(n, irl, val, dupl, x, info) - implicit none - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n, dupl - integer(psb_ipk_), intent(in) :: irl(:) - integer(psb_ipk_), intent(in) :: val(:) - integer(psb_ipk_), intent(out) :: info - - integer(psb_ipk_) :: i - - info = 0 - if (x%is_dev()) call x%sync() - call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info) + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync_space() + end select + select type(vval => val) + type is (psb_i_vect_oacc) + if (vval%is_dev()) call vval%sync_space() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine i_oacc_ins_v + + subroutine i_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + integer(psb_ipk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync_space() + call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + !$acc update device(x%v) + + end subroutine i_oacc_ins_a + + subroutine i_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n, info) + if (info /= 0) then + call psb_errpush(info, 'i_oacc_bld_mn', i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + !$acc update device(x%v) + + end subroutine i_oacc_bld_mn + + + subroutine i_oacc_bld_x(x, this) + use psb_base_mod + implicit none + integer(psb_ipk_), intent(in) :: this(:) + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'i_oacc_bld_x', & + i_err=(/size(this), izero, izero, izero, izero/)) + return + end if + + x%v(:) = this(:) + call x%set_host() + !$acc update device(x%v) + + end subroutine i_oacc_bld_x + + subroutine i_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_i_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() call x%set_host() - end subroutine i_oacc_ins_a - - subroutine i_oacc_ins_v(n, irl, val, dupl, x, info) - implicit none - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n, dupl - class(psb_i_base_vect_type), intent(inout) :: irl - class(psb_i_base_vect_type), intent(inout) :: val - integer(psb_ipk_), intent(out) :: info - - integer(psb_ipk_) :: i, isz - logical :: done_oacc - - info = 0 - if (psb_errstatus_fatal()) return - - done_oacc = .false. - select type(virl => irl) - class is (psb_i_vect_oacc) - select type(vval => val) - class is (psb_i_vect_oacc) - if (vval%is_host()) call vval%sync() - if (virl%is_host()) call virl%sync() - if (x%is_host()) call x%sync() - ! Add the OpenACC kernel call here if needed - call x%set_dev() - done_oacc = .true. - end select - end select - - if (.not.done_oacc) then - if (irl%is_dev()) call irl%sync() - if (val%is_dev()) call val%sync() - call x%ins(n, irl%v, val%v, dupl, info) - end if - - if (info /= 0) then - call psb_errpush(info,'i_oacc_ins_v') - return - end if - end subroutine i_oacc_ins_v - - subroutine i_oacc_bld_x(x, this) - use psb_error_mod - implicit none - integer(psb_ipk_), intent(in) :: this(:) - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: info - - call psb_realloc(size(this), x%v, info) - if (info /= 0) then - info = psb_err_alloc_request_ - call psb_errpush(info, 'i_oacc_bld_x', i_err = (/size(this), izero, izero, izero, izero/)) - end if - x%v(:) = this(:) + end if + else + if (size(x%v) < n) then + call x%psb_i_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() call x%set_host() - call x%sync() - end subroutine i_oacc_bld_x - - subroutine i_oacc_bld_mn(x, n) - use psb_error_mod - implicit none - integer(psb_mpk_), intent(in) :: n - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: info - - call x%all(n, info) - if (info /= 0) then - call psb_errpush(info, 'i_oacc_bld_mn', i_err = (/n, n, n, n, n/)) - end if - end subroutine i_oacc_bld_mn - - subroutine i_oacc_sync(x) - use psb_error_mod - implicit none - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: n, info - - info = 0 - if (x%is_host()) then - n = size(x%v) - if (.not.allocated(x%v)) then - write(*, *) 'Incoherent situation : x%v not allocated' - call psb_realloc(n, x%v, info) - end if - if ((n > size(x%v)) .or. (n > x%get_nrows())) then - write(*, *) 'Incoherent situation : sizes', n, size(x%v), x%get_nrows() - call psb_realloc(n, x%v, info) - end if - !$acc update device(x%v) - else if (x%is_dev()) then - n = size(x%v) - if (.not.allocated(x%v)) then - write(*, *) 'Incoherent situation : x%v not allocated' - call psb_realloc(n, x%v, info) - end if - !$acc update self(x%v) - end if - if (info == 0) call x%set_sync() - if (info /= 0) then - info = psb_err_internal_error_ - call psb_errpush(info, 'i_oacc_sync') - end if - end subroutine i_oacc_sync - - subroutine i_oacc_sync_space(x, info) - use psb_error_mod - implicit none - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: nh, nd - - info = 0 - if (x%is_dev()) then - nh = size(x%v) - nd = nh - if (nh < nd) then - call psb_realloc(nd, x%v, info) - end if - else - nh = size(x%v) - nd = nh - if (nh < nd) then - call psb_realloc(nd, x%v, info) - end if - end if - end subroutine i_oacc_sync_space - - function i_oacc_get_nrows(x) result(res) - implicit none - class(psb_i_vect_oacc), intent(in) :: x - integer(psb_ipk_) :: res - - res = 0 - if (allocated(x%v)) res = size(x%v) - end function i_oacc_get_nrows - - function i_oacc_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'iOACC' - end function i_oacc_get_fmt - - subroutine i_oacc_all(n, x, info) - use psb_error_mod - implicit none - class(psb_i_vect_oacc), intent(out) :: x - integer(psb_ipk_), intent(in) :: n - integer(psb_ipk_), intent(out) :: info - - call psb_realloc(n, x%v, info) - if (info == 0) call x%set_host() - if (info == 0) call x%sync_space(info) - if (info /= 0) then - info = psb_err_alloc_request_ - call psb_errpush(info, 'i_oacc_all', i_err=(/n, n, n, n, n/)) - end if - end subroutine i_oacc_all - - subroutine i_oacc_zero(x) - use psb_error_mod - implicit none - class(psb_i_vect_oacc), intent(inout) :: x - ! Ensure zeroing on the GPU side - call x%set_dev() - x%v = 0 - !$acc update device(x%v) - end subroutine i_oacc_zero - - subroutine i_oacc_asb_m(n, x, info) - use psb_error_mod - use psb_realloc_mod - implicit none - integer(psb_ipk_), intent(in) :: n - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: nh, nd - - info = 0 - if (x%is_dev()) then - nd = size(x%v) - if (nd < n) then - call x%sync() - call x%psb_i_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space(info) - call x%set_host() - end if - else - nh = size(x%v) - if (nh < n) then - call x%psb_i_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space(info) - call x%set_host() - end if - end if - end subroutine i_oacc_asb_m - - subroutine i_oacc_vect_finalize(x) - use psi_serial_mod - use psb_realloc_mod - implicit none - type(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: info - - info = 0 - call x%free(info) - end subroutine i_oacc_vect_finalize + end if + end if + end subroutine i_oacc_asb_m -end module psb_i_oacc_vect_mod + subroutine i_oacc_set_scal(x, val, first, last) + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine i_oacc_set_scal + + subroutine i_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(izero) + end subroutine i_oacc_zero + + function i_oacc_get_nrows(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function i_oacc_get_nrows + + function i_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "iOACC" + + end function i_oacc_get_fmt + + ! subroutine i_oacc_set_vect(x,y) + ! implicit none + ! class(psb_i_vect_oacc), intent(inout) :: x + ! integer(psb_ipk_), intent(in) :: y(:) + ! integer(psb_ipk_) :: info + + ! if (size(x%v) /= size(y)) then + ! call x%free(info) + ! call x%all(size(y),info) + ! end if + ! x%v(:) = y(:) + ! call x%set_host() + ! end subroutine i_oacc_set_vect - - - - - \ No newline at end of file + subroutine i_oacc_to_dev(v) + implicit none + integer(psb_ipk_) :: v(:) + !$acc update device(v) + end subroutine i_oacc_to_dev + + subroutine i_oacc_to_host(v) + implicit none + integer(psb_ipk_) :: v(:) + !$acc update self(v) + end subroutine i_oacc_to_host + + subroutine i_oacc_sync_space(x) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + if (allocated(x%v)) then + call i_oacc_create_dev(x%v) + end if + contains + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + end subroutine i_oacc_sync_space + + subroutine i_oacc_sync(x) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + if (x%is_dev()) then + call i_oacc_to_host(x%v) + end if + if (x%is_host()) then + call i_oacc_to_dev(x%v) + end if + call x%set_sync() + end subroutine i_oacc_sync + + subroutine i_oacc_set_host(x) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + + x%state = is_host + end subroutine i_oacc_set_host + + subroutine i_oacc_set_dev(x) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + + x%state = is_dev + end subroutine i_oacc_set_dev + + subroutine i_oacc_set_sync(x) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + + x%state = is_sync + end subroutine i_oacc_set_sync + + function i_oacc_is_dev(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function i_oacc_is_dev + + function i_oacc_is_host(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function i_oacc_is_host + + function i_oacc_is_sync(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function i_oacc_is_sync + + subroutine i_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_i_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info == 0) then + call x%set_host() + !$acc enter data create(x%v) + call x%sync_space() + end if + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'i_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + end subroutine i_oacc_vect_all + + subroutine i_oacc_vect_free(x, info) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 + if (allocated(x%v)) then + !$acc exit data delete(x%v) finalize + deallocate(x%v, stat=info) + end if + + end subroutine i_oacc_vect_free + + function i_oacc_get_size(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + if (x%is_dev()) call x%sync() + res = size(x%v) + end function i_oacc_get_size + +end module psb_i_oacc_vect_mod diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90 new file mode 100644 index 00000000..aeba4537 --- /dev/null +++ b/openacc/psb_l_oacc_vect_mod.F90 @@ -0,0 +1,507 @@ +module psb_l_oacc_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_l_vect_mod + use psb_i_vect_mod + use psb_i_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_l_base_vect_type) :: psb_l_vect_oacc + integer :: state = is_host + + contains + procedure, pass(x) :: get_nrows => l_oacc_get_nrows + procedure, nopass :: get_fmt => l_oacc_get_fmt + + procedure, pass(x) :: all => l_oacc_vect_all + procedure, pass(x) :: zero => l_oacc_zero + procedure, pass(x) :: asb_m => l_oacc_asb_m + procedure, pass(x) :: sync => l_oacc_sync + procedure, pass(x) :: sync_space => l_oacc_sync_space + procedure, pass(x) :: bld_x => l_oacc_bld_x + procedure, pass(x) :: bld_mn => l_oacc_bld_mn + procedure, pass(x) :: free => l_oacc_vect_free + procedure, pass(x) :: ins_a => l_oacc_ins_a + procedure, pass(x) :: ins_v => l_oacc_ins_v + procedure, pass(x) :: is_host => l_oacc_is_host + procedure, pass(x) :: is_dev => l_oacc_is_dev + procedure, pass(x) :: is_sync => l_oacc_is_sync + procedure, pass(x) :: set_host => l_oacc_set_host + procedure, pass(x) :: set_dev => l_oacc_set_dev + procedure, pass(x) :: set_sync => l_oacc_set_sync + procedure, pass(x) :: set_scal => l_oacc_set_scal + + procedure, pass(x) :: gthzv_x => l_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => l_oacc_gthzbuf + procedure, pass(y) :: sctb => l_oacc_sctb + procedure, pass(y) :: sctb_x => l_oacc_sctb_x + procedure, pass(y) :: sctb_buf => l_oacc_sctb_buf + + procedure, pass(x) :: get_size => l_oacc_get_size + + + end type psb_l_vect_oacc + + +contains + + + subroutine l_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_lpk_) :: beta + class(psb_l_vect_oacc) :: y + integer(psb_ipk_) :: info + + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + end do + + class default + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + end do + end select + end subroutine l_oacc_sctb_buf + + subroutine l_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_lpk_) :: beta, x(:) + class(psb_l_vect_oacc) :: y + integer(psb_ipk_) :: info, ni + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + class default + call psb_errpush(info, 'l_oacc_sctb_x') + return + end select + + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) + end do + + call y%set_dev() + end subroutine l_oacc_sctb_x + + subroutine l_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_lpk_) :: beta, x(:) + class(psb_l_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine l_oacc_sctb + + subroutine l_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + class(psb_l_vect_oacc) :: x + integer(psb_ipk_) :: info + + info = 0 + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + class default + call psb_errpush(info, 'l_oacc_gthzbuf') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + x%combuf(i) = x%v(idx%v(i)) + end do + end subroutine l_oacc_gthzbuf + + subroutine l_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type):: idx + integer(psb_lpk_) :: y(:) + class(psb_l_vect_oacc):: x + integer(psb_ipk_) :: info + + info = 0 + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + class default + call psb_errpush(info, 'l_oacc_gthzv_x') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + y(i) = x%v(idx%v(i)) + end do + end subroutine l_oacc_gthzv_x + + subroutine l_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_l_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_l_vect_oacc) + if (vval%is_host()) call vval%sync_space() + if (virl%is_host()) call virl%sync_space() + if (x%is_host()) call x%sync_space() + !$acc parallel loop + do i = 1, n + x%v(virl%v(i)) = vval%v(i) + end do + call x%set_dev() + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync_space() + end select + select type(vval => val) + type is (psb_l_vect_oacc) + if (vval%is_dev()) call vval%sync_space() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine l_oacc_ins_v + + subroutine l_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + integer(psb_lpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync_space() + call x%psb_l_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + !$acc update device(x%v) + + end subroutine l_oacc_ins_a + + subroutine l_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n, info) + if (info /= 0) then + call psb_errpush(info, 'l_oacc_bld_mn', i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + !$acc update device(x%v) + + end subroutine l_oacc_bld_mn + + + subroutine l_oacc_bld_x(x, this) + use psb_base_mod + implicit none + integer(psb_lpk_), intent(in) :: this(:) + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'l_oacc_bld_x', & + i_err=(/size(this), izero, izero, izero, izero/)) + return + end if + + x%v(:) = this(:) + call x%set_host() + !$acc update device(x%v) + + end subroutine l_oacc_bld_x + + subroutine l_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_l_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + else + if (size(x%v) < n) then + call x%psb_l_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + end if + end subroutine l_oacc_asb_m + + subroutine l_oacc_set_scal(x, val, first, last) + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_lpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine l_oacc_set_scal + + subroutine l_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(lzero) + end subroutine l_oacc_zero + + function l_oacc_get_nrows(x) result(res) + implicit none + class(psb_l_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function l_oacc_get_nrows + + function l_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "lOACC" + + end function l_oacc_get_fmt + + ! subroutine l_oacc_set_vect(x,y) + ! implicit none + ! class(psb_l_vect_oacc), intent(inout) :: x + ! integer(psb_lpk_), intent(in) :: y(:) + ! integer(psb_ipk_) :: info + + ! if (size(x%v) /= size(y)) then + ! call x%free(info) + ! call x%all(size(y),info) + ! end if + ! x%v(:) = y(:) + ! call x%set_host() + ! end subroutine l_oacc_set_vect + + subroutine l_oacc_to_dev(v) + implicit none + integer(psb_lpk_) :: v(:) + !$acc update device(v) + end subroutine l_oacc_to_dev + + subroutine l_oacc_to_host(v) + implicit none + integer(psb_lpk_) :: v(:) + !$acc update self(v) + end subroutine l_oacc_to_host + + subroutine l_oacc_sync_space(x) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + if (allocated(x%v)) then + call l_oacc_create_dev(x%v) + end if + contains + subroutine l_oacc_create_dev(v) + implicit none + integer(psb_lpk_) :: v(:) + !$acc enter data copyin(v) + end subroutine l_oacc_create_dev + end subroutine l_oacc_sync_space + + subroutine l_oacc_sync(x) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + if (x%is_dev()) then + call l_oacc_to_host(x%v) + end if + if (x%is_host()) then + call l_oacc_to_dev(x%v) + end if + call x%set_sync() + end subroutine l_oacc_sync + + subroutine l_oacc_set_host(x) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + + x%state = is_host + end subroutine l_oacc_set_host + + subroutine l_oacc_set_dev(x) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + + x%state = is_dev + end subroutine l_oacc_set_dev + + subroutine l_oacc_set_sync(x) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + + x%state = is_sync + end subroutine l_oacc_set_sync + + function l_oacc_is_dev(x) result(res) + implicit none + class(psb_l_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function l_oacc_is_dev + + function l_oacc_is_host(x) result(res) + implicit none + class(psb_l_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function l_oacc_is_host + + function l_oacc_is_sync(x) result(res) + implicit none + class(psb_l_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function l_oacc_is_sync + + subroutine l_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_l_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info == 0) then + call x%set_host() + !$acc enter data create(x%v) + call x%sync_space() + end if + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'l_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + end subroutine l_oacc_vect_all + + subroutine l_oacc_vect_free(x, info) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 + if (allocated(x%v)) then + !$acc exit data delete(x%v) finalize + deallocate(x%v, stat=info) + end if + + end subroutine l_oacc_vect_free + + function l_oacc_get_size(x) result(res) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + if (x%is_dev()) call x%sync() + res = size(x%v) + end function l_oacc_get_size + +end module psb_l_oacc_vect_mod diff --git a/openacc/psb_oacc_mod.F90 b/openacc/psb_oacc_mod.F90 index 2d8e8b40..7d3f9406 100644 --- a/openacc/psb_oacc_mod.F90 +++ b/openacc/psb_oacc_mod.F90 @@ -4,6 +4,7 @@ module psb_oacc_mod use psb_oacc_env_mod use psb_i_oacc_vect_mod + use psb_l_oacc_vect_mod use psb_s_oacc_vect_mod use psb_d_oacc_vect_mod use psb_c_oacc_vect_mod @@ -13,5 +14,13 @@ module psb_oacc_mod use psb_d_oacc_csr_mat_mod use psb_c_oacc_csr_mat_mod use psb_z_oacc_csr_mat_mod + use psb_s_oacc_ell_mat_mod + use psb_d_oacc_ell_mat_mod + use psb_c_oacc_ell_mat_mod + use psb_z_oacc_ell_mat_mod + use psb_s_oacc_hll_mat_mod + use psb_d_oacc_hll_mat_mod + use psb_c_oacc_hll_mat_mod + use psb_z_oacc_hll_mat_mod end module psb_oacc_mod diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90 new file mode 100644 index 00000000..541fdf9a --- /dev/null +++ b/openacc/psb_s_oacc_ell_mat_mod.F90 @@ -0,0 +1,341 @@ +module psb_s_oacc_ell_mat_mod + use iso_c_binding + use psb_s_mat_mod + use psb_s_ell_mat_mod + use psb_s_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_ell_sparse_mat) :: psb_s_oacc_ell_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => s_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => s_oacc_ell_sizeof + procedure, pass(a) :: is_host => s_oacc_ell_is_host + procedure, pass(a) :: is_sync => s_oacc_ell_is_sync + procedure, pass(a) :: is_dev => s_oacc_ell_is_dev + procedure, pass(a) :: set_host => s_oacc_ell_set_host + procedure, pass(a) :: set_sync => s_oacc_ell_set_sync + procedure, pass(a) :: set_dev => s_oacc_ell_set_dev + procedure, pass(a) :: sync_space => s_oacc_ell_sync_space + procedure, pass(a) :: sync => s_oacc_ell_sync + procedure, pass(a) :: free => s_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_s_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_oacc_ell_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_oacc_ell_csmm + procedure, pass(a) :: csmv => psb_s_oacc_ell_csmv + procedure, pass(a) :: scals => psb_s_oacc_ell_scals + procedure, pass(a) :: scalv => psb_s_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_s_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_s_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_s_oacc_ell_mold + + end type psb_s_oacc_ell_sparse_mat + + interface + module subroutine psb_s_oacc_ell_mold(a,b,info) + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_mold + end interface + + interface + module subroutine psb_s_oacc_ell_cp_from_fmt(a,b,info) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_cp_from_fmt + end interface + + interface + module subroutine psb_s_oacc_ell_mv_from_coo(a,b,info) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_mv_from_coo + end interface + + interface + module subroutine psb_s_oacc_ell_mv_from_fmt(a,b,info) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_mv_from_fmt + end interface + + interface + module subroutine psb_s_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_ell_vect_mv + end interface + + interface + module subroutine psb_s_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_ell_inner_vect_sv + end interface + + interface + module subroutine psb_s_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_ell_csmm + end interface + + interface + module subroutine psb_s_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_ell_csmv + end interface + + interface + module subroutine psb_s_oacc_ell_scals(d, a, info) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_scals + end interface + + interface + module subroutine psb_s_oacc_ell_scal(d,a,info,side) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_s_oacc_ell_scal + end interface + + interface + module subroutine psb_s_oacc_ell_reallocate_nz(nz,a) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_s_oacc_ell_reallocate_nz + end interface + + interface + module subroutine psb_s_oacc_ell_allocate_mnnz(m,n,a,nz) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_oacc_ell_allocate_mnnz + end interface + + interface + module subroutine psb_s_oacc_ell_cp_from_coo(a,b,info) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_cp_from_coo + end interface + +contains + + subroutine s_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + + call a%psb_s_ell_sparse_mat%free() + + return + end subroutine s_oacc_ell_free + + + function s_oacc_ell_sizeof(a) result(res) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + + end function s_oacc_ell_sizeof + + subroutine s_oacc_ell_sync_space(a) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call s_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + + contains + subroutine s_oacc_create_dev(v) + implicit none + real(psb_spk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine s_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + end subroutine s_oacc_ell_sync_space + + + + function s_oacc_ell_is_host(a) result(res) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_oacc_ell_is_host + + function s_oacc_ell_is_sync(a) result(res) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_oacc_ell_is_sync + + function s_oacc_ell_is_dev(a) result(res) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_oacc_ell_is_dev + + subroutine s_oacc_ell_set_host(a) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_oacc_ell_set_host + + subroutine s_oacc_ell_set_sync(a) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_oacc_ell_set_sync + + subroutine s_oacc_ell_set_dev(a) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_oacc_ell_set_dev + + function s_oacc_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL_oacc' + end function s_oacc_ell_get_fmt + + subroutine s_oacc_ell_sync(a) + implicit none + class(psb_s_oacc_ell_sparse_mat), target, intent(in) :: a + class(psb_s_oacc_ell_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call s_oacc_ell_to_host(a%val) + call i_oacc_ell_to_host(a%ja) + call i_oacc_ell_to_host_scalar(a%irn) + call i_oacc_ell_to_host_scalar(a%idiag) + else if (a%is_host()) then + call s_oacc_ell_to_dev(a%val) + call i_oacc_ell_to_dev(a%ja) + call i_oacc_ell_to_dev_scalar(a%irn) + call i_oacc_ell_to_dev_scalar(a%idiag) + end if + call tmpa%set_sync() + end subroutine s_oacc_ell_sync + + subroutine s_oacc_ell_to_host(v) + implicit none + real(psb_spk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine s_oacc_ell_to_host + + subroutine s_oacc_ell_to_host_scalar(v) + implicit none + real(psb_spk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine s_oacc_ell_to_host_scalar + + subroutine i_oacc_ell_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev + + subroutine i_oacc_ell_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev_scalar + + subroutine i_oacc_ell_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host + + subroutine i_oacc_ell_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host_scalar +end module psb_s_oacc_ell_mat_mod diff --git a/openacc/psb_s_oacc_hll_mat_mod.F90 b/openacc/psb_s_oacc_hll_mat_mod.F90 new file mode 100644 index 00000000..bf8949a1 --- /dev/null +++ b/openacc/psb_s_oacc_hll_mat_mod.F90 @@ -0,0 +1,352 @@ +module psb_s_oacc_hll_mat_mod + use iso_c_binding + use psb_s_mat_mod + use psb_s_hll_mat_mod + use psb_s_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_hll_sparse_mat) :: psb_s_oacc_hll_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => s_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => s_oacc_hll_sizeof + procedure, pass(a) :: is_host => s_oacc_hll_is_host + procedure, pass(a) :: is_sync => s_oacc_hll_is_sync + procedure, pass(a) :: is_dev => s_oacc_hll_is_dev + procedure, pass(a) :: set_host => s_oacc_hll_set_host + procedure, pass(a) :: set_sync => s_oacc_hll_set_sync + procedure, pass(a) :: set_dev => s_oacc_hll_set_dev + procedure, pass(a) :: sync_space => s_oacc_hll_sync_space + procedure, pass(a) :: sync => s_oacc_hll_sync + procedure, pass(a) :: free => s_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_s_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_oacc_hll_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_oacc_hll_csmm + procedure, pass(a) :: csmv => psb_s_oacc_hll_csmv + procedure, pass(a) :: scals => psb_s_oacc_hll_scals + procedure, pass(a) :: scalv => psb_s_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_s_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_s_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_s_oacc_hll_mold + + end type psb_s_oacc_hll_sparse_mat + + interface + module subroutine psb_s_oacc_hll_mold(a,b,info) + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_mold + end interface + + interface + module subroutine psb_s_oacc_hll_cp_from_fmt(a,b,info) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_cp_from_fmt + end interface + + interface + module subroutine psb_s_oacc_hll_mv_from_coo(a,b,info) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_mv_from_coo + end interface + + interface + module subroutine psb_s_oacc_hll_mv_from_fmt(a,b,info) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_mv_from_fmt + end interface + + interface + module subroutine psb_s_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_hll_vect_mv + end interface + + interface + module subroutine psb_s_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_hll_inner_vect_sv + end interface + + interface + module subroutine psb_s_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_hll_csmm + end interface + + interface + module subroutine psb_s_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_hll_csmv + end interface + + interface + module subroutine psb_s_oacc_hll_scals(d, a, info) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_scals + end interface + + interface + module subroutine psb_s_oacc_hll_scal(d,a,info,side) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_s_oacc_hll_scal + end interface + + interface + module subroutine psb_s_oacc_hll_reallocate_nz(nz,a) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_s_oacc_hll_reallocate_nz + end interface + + interface + module subroutine psb_s_oacc_hll_allocate_mnnz(m,n,a,nz) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_oacc_hll_allocate_mnnz + end interface + + interface + module subroutine psb_s_oacc_hll_cp_from_coo(a,b,info) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_cp_from_coo + end interface + +contains + + subroutine s_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + if (allocated(a%hkoffs)) then + !$acc exit data delete(a%hkoffs) + end if + + call a%psb_s_hll_sparse_mat%free() + + return + end subroutine s_oacc_hll_free + + function s_oacc_hll_sizeof(a) result(res) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + end function s_oacc_hll_sizeof + + + + function s_oacc_hll_is_host(a) result(res) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_oacc_hll_is_host + + function s_oacc_hll_is_sync(a) result(res) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_oacc_hll_is_sync + + function s_oacc_hll_is_dev(a) result(res) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_oacc_hll_is_dev + + subroutine s_oacc_hll_set_host(a) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_oacc_hll_set_host + + subroutine s_oacc_hll_set_sync(a) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_oacc_hll_set_sync + + subroutine s_oacc_hll_set_dev(a) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_oacc_hll_set_dev + + function s_oacc_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL_oacc' + end function s_oacc_hll_get_fmt + + subroutine s_oacc_hll_sync_space(a) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call s_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + if (allocated(a%hkoffs)) then + call i_oacc_create_dev_scalar(a%hkoffs) + end if + + contains + subroutine s_oacc_create_dev(v) + implicit none + real(psb_spk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine s_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + + end subroutine s_oacc_hll_sync_space + + + subroutine s_oacc_hll_sync(a) + implicit none + class(psb_s_oacc_hll_sparse_mat), target, intent(in) :: a + class(psb_s_oacc_hll_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call s_oacc_hll_to_host(a%val) + call i_oacc_hll_to_host(a%ja) + call i_oacc_hll_to_host_scalar(a%irn) + call i_oacc_hll_to_host_scalar(a%idiag) + call i_oacc_hll_to_host_scalar(a%hkoffs) + else if (a%is_host()) then + call s_oacc_hll_to_dev(a%val) + call i_oacc_hll_to_dev(a%ja) + call i_oacc_hll_to_dev_scalar(a%irn) + call i_oacc_hll_to_dev_scalar(a%idiag) + call i_oacc_hll_to_dev_scalar(a%hkoffs) + end if + call tmpa%set_sync() + end subroutine s_oacc_hll_sync + + subroutine s_oacc_hll_to_host(v) + implicit none + real(psb_spk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine s_oacc_hll_to_host + + subroutine s_oacc_hll_to_dev(v) + implicit none + real(psb_spk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine s_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host + + subroutine i_oacc_hll_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host_scalar + + subroutine i_oacc_hll_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev_scalar + + +end module psb_s_oacc_hll_mat_mod diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90 index 36ae7da8..5c34827d 100644 --- a/openacc/psb_s_oacc_vect_mod.F90 +++ b/openacc/psb_s_oacc_vect_mod.F90 @@ -42,6 +42,7 @@ module psb_s_oacc_vect_mod procedure, pass(y) :: sctb_buf => s_oacc_sctb_buf procedure, pass(x) :: get_size => s_oacc_get_size + procedure, pass(x) :: dot_v => s_oacc_vect_dot procedure, pass(x) :: dot_a => s_oacc_dot_a procedure, pass(y) :: axpby_v => s_oacc_axpby_v @@ -70,7 +71,6 @@ module psb_s_oacc_vect_mod end subroutine s_oacc_mlt_v end interface - interface subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) import @@ -83,7 +83,7 @@ module psb_s_oacc_vect_mod character(len=1), intent(in), optional :: conjgx, conjgy end subroutine s_oacc_mlt_v_2 end interface - + contains subroutine s_oacc_absval1(x) @@ -432,7 +432,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() if (y%is_host()) call y%sync_space() !$acc parallel loop @@ -459,7 +459,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 's_oacc_sctb_x') return @@ -475,8 +475,6 @@ contains call y%set_dev() end subroutine s_oacc_sctb_x - - subroutine s_oacc_sctb(n, idx, x, beta, y) use psb_base_mod implicit none @@ -498,7 +496,6 @@ contains call y%set_host() end subroutine s_oacc_sctb - subroutine s_oacc_gthzbuf(i, n, idx, x) use psb_base_mod implicit none @@ -515,7 +512,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 's_oacc_gthzbuf') return @@ -542,7 +539,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 's_oacc_gthzv_x') return @@ -577,7 +574,7 @@ contains select type(vval => val) type is (psb_s_vect_oacc) if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space(info) + if (virl%is_host()) call virl%sync_space() if (x%is_host()) call x%sync_space() !$acc parallel loop do i = 1, n @@ -591,7 +588,7 @@ contains if (.not.done_oacc) then select type(virl => irl) type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space(info) + if (virl%is_dev()) call virl%sync_space() end select select type(vval => val) type is (psb_s_vect_oacc) @@ -607,8 +604,6 @@ contains end subroutine s_oacc_ins_v - - subroutine s_oacc_ins_a(n, irl, val, dupl, x, info) use psi_serial_mod implicit none @@ -628,8 +623,6 @@ contains end subroutine s_oacc_ins_a - - subroutine s_oacc_bld_mn(x, n) use psb_base_mod implicit none @@ -668,7 +661,6 @@ contains end subroutine s_oacc_bld_x - subroutine s_oacc_asb_m(n, x, info) use psb_base_mod implicit none @@ -696,8 +688,6 @@ contains end if end subroutine s_oacc_asb_m - - subroutine s_oacc_set_scal(x, val, first, last) class(psb_s_vect_oacc), intent(inout) :: x real(psb_spk_), intent(in) :: val @@ -718,8 +708,6 @@ contains call x%set_dev() end subroutine s_oacc_set_scal - - subroutine s_oacc_zero(x) use psi_serial_mod implicit none @@ -743,6 +731,7 @@ contains end function s_oacc_get_fmt + function s_oacc_vect_dot(n, x, y) result(res) implicit none class(psb_s_vect_oacc), intent(inout) :: x @@ -776,9 +765,6 @@ contains end function s_oacc_vect_dot - - - function s_oacc_dot_a(n, x, y) result(res) implicit none class(psb_s_vect_oacc), intent(inout) :: x @@ -910,7 +896,6 @@ contains end if end subroutine s_oacc_vect_all - subroutine s_oacc_vect_free(x, info) implicit none class(psb_s_vect_oacc), intent(inout) :: x diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90 new file mode 100644 index 00000000..8bf8c9fa --- /dev/null +++ b/openacc/psb_z_oacc_ell_mat_mod.F90 @@ -0,0 +1,341 @@ +module psb_z_oacc_ell_mat_mod + use iso_c_binding + use psb_z_mat_mod + use psb_z_ell_mat_mod + use psb_z_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_ell_sparse_mat) :: psb_z_oacc_ell_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => z_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => z_oacc_ell_sizeof + procedure, pass(a) :: is_host => z_oacc_ell_is_host + procedure, pass(a) :: is_sync => z_oacc_ell_is_sync + procedure, pass(a) :: is_dev => z_oacc_ell_is_dev + procedure, pass(a) :: set_host => z_oacc_ell_set_host + procedure, pass(a) :: set_sync => z_oacc_ell_set_sync + procedure, pass(a) :: set_dev => z_oacc_ell_set_dev + procedure, pass(a) :: sync_space => z_oacc_ell_sync_space + procedure, pass(a) :: sync => z_oacc_ell_sync + procedure, pass(a) :: free => z_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_z_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_oacc_ell_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_oacc_ell_csmm + procedure, pass(a) :: csmv => psb_z_oacc_ell_csmv + procedure, pass(a) :: scals => psb_z_oacc_ell_scals + procedure, pass(a) :: scalv => psb_z_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_z_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_z_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_z_oacc_ell_mold + + end type psb_z_oacc_ell_sparse_mat + + interface + module subroutine psb_z_oacc_ell_mold(a,b,info) + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_mold + end interface + + interface + module subroutine psb_z_oacc_ell_cp_from_fmt(a,b,info) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_cp_from_fmt + end interface + + interface + module subroutine psb_z_oacc_ell_mv_from_coo(a,b,info) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_mv_from_coo + end interface + + interface + module subroutine psb_z_oacc_ell_mv_from_fmt(a,b,info) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_mv_from_fmt + end interface + + interface + module subroutine psb_z_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_ell_vect_mv + end interface + + interface + module subroutine psb_z_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_ell_inner_vect_sv + end interface + + interface + module subroutine psb_z_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_ell_csmm + end interface + + interface + module subroutine psb_z_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_ell_csmv + end interface + + interface + module subroutine psb_z_oacc_ell_scals(d, a, info) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_scals + end interface + + interface + module subroutine psb_z_oacc_ell_scal(d,a,info,side) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_z_oacc_ell_scal + end interface + + interface + module subroutine psb_z_oacc_ell_reallocate_nz(nz,a) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_z_oacc_ell_reallocate_nz + end interface + + interface + module subroutine psb_z_oacc_ell_allocate_mnnz(m,n,a,nz) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_oacc_ell_allocate_mnnz + end interface + + interface + module subroutine psb_z_oacc_ell_cp_from_coo(a,b,info) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_cp_from_coo + end interface + +contains + + subroutine z_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + + call a%psb_z_ell_sparse_mat%free() + + return + end subroutine z_oacc_ell_free + + + function z_oacc_ell_sizeof(a) result(res) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + + end function z_oacc_ell_sizeof + + subroutine z_oacc_ell_sync_space(a) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call z_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + + contains + subroutine z_oacc_create_dev(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine z_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + end subroutine z_oacc_ell_sync_space + + + + function z_oacc_ell_is_host(a) result(res) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_oacc_ell_is_host + + function z_oacc_ell_is_sync(a) result(res) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_oacc_ell_is_sync + + function z_oacc_ell_is_dev(a) result(res) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_oacc_ell_is_dev + + subroutine z_oacc_ell_set_host(a) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_oacc_ell_set_host + + subroutine z_oacc_ell_set_sync(a) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_oacc_ell_set_sync + + subroutine z_oacc_ell_set_dev(a) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_oacc_ell_set_dev + + function z_oacc_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL_oacc' + end function z_oacc_ell_get_fmt + + subroutine z_oacc_ell_sync(a) + implicit none + class(psb_z_oacc_ell_sparse_mat), target, intent(in) :: a + class(psb_z_oacc_ell_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call z_oacc_ell_to_host(a%val) + call i_oacc_ell_to_host(a%ja) + call i_oacc_ell_to_host_scalar(a%irn) + call i_oacc_ell_to_host_scalar(a%idiag) + else if (a%is_host()) then + call z_oacc_ell_to_dev(a%val) + call i_oacc_ell_to_dev(a%ja) + call i_oacc_ell_to_dev_scalar(a%irn) + call i_oacc_ell_to_dev_scalar(a%idiag) + end if + call tmpa%set_sync() + end subroutine z_oacc_ell_sync + + subroutine z_oacc_ell_to_host(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine z_oacc_ell_to_host + + subroutine z_oacc_ell_to_host_scalar(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine z_oacc_ell_to_host_scalar + + subroutine i_oacc_ell_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev + + subroutine i_oacc_ell_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev_scalar + + subroutine i_oacc_ell_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host + + subroutine i_oacc_ell_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host_scalar +end module psb_z_oacc_ell_mat_mod diff --git a/openacc/psb_z_oacc_hll_mat_mod.F90 b/openacc/psb_z_oacc_hll_mat_mod.F90 new file mode 100644 index 00000000..e6a4929a --- /dev/null +++ b/openacc/psb_z_oacc_hll_mat_mod.F90 @@ -0,0 +1,352 @@ +module psb_z_oacc_hll_mat_mod + use iso_c_binding + use psb_z_mat_mod + use psb_z_hll_mat_mod + use psb_z_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_hll_sparse_mat) :: psb_z_oacc_hll_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => z_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => z_oacc_hll_sizeof + procedure, pass(a) :: is_host => z_oacc_hll_is_host + procedure, pass(a) :: is_sync => z_oacc_hll_is_sync + procedure, pass(a) :: is_dev => z_oacc_hll_is_dev + procedure, pass(a) :: set_host => z_oacc_hll_set_host + procedure, pass(a) :: set_sync => z_oacc_hll_set_sync + procedure, pass(a) :: set_dev => z_oacc_hll_set_dev + procedure, pass(a) :: sync_space => z_oacc_hll_sync_space + procedure, pass(a) :: sync => z_oacc_hll_sync + procedure, pass(a) :: free => z_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_z_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_oacc_hll_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_oacc_hll_csmm + procedure, pass(a) :: csmv => psb_z_oacc_hll_csmv + procedure, pass(a) :: scals => psb_z_oacc_hll_scals + procedure, pass(a) :: scalv => psb_z_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_z_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_z_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_z_oacc_hll_mold + + end type psb_z_oacc_hll_sparse_mat + + interface + module subroutine psb_z_oacc_hll_mold(a,b,info) + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_mold + end interface + + interface + module subroutine psb_z_oacc_hll_cp_from_fmt(a,b,info) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_cp_from_fmt + end interface + + interface + module subroutine psb_z_oacc_hll_mv_from_coo(a,b,info) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_mv_from_coo + end interface + + interface + module subroutine psb_z_oacc_hll_mv_from_fmt(a,b,info) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_mv_from_fmt + end interface + + interface + module subroutine psb_z_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_hll_vect_mv + end interface + + interface + module subroutine psb_z_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_hll_inner_vect_sv + end interface + + interface + module subroutine psb_z_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_hll_csmm + end interface + + interface + module subroutine psb_z_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_hll_csmv + end interface + + interface + module subroutine psb_z_oacc_hll_scals(d, a, info) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_scals + end interface + + interface + module subroutine psb_z_oacc_hll_scal(d,a,info,side) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_z_oacc_hll_scal + end interface + + interface + module subroutine psb_z_oacc_hll_reallocate_nz(nz,a) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_z_oacc_hll_reallocate_nz + end interface + + interface + module subroutine psb_z_oacc_hll_allocate_mnnz(m,n,a,nz) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_oacc_hll_allocate_mnnz + end interface + + interface + module subroutine psb_z_oacc_hll_cp_from_coo(a,b,info) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_cp_from_coo + end interface + +contains + + subroutine z_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + if (allocated(a%hkoffs)) then + !$acc exit data delete(a%hkoffs) + end if + + call a%psb_z_hll_sparse_mat%free() + + return + end subroutine z_oacc_hll_free + + function z_oacc_hll_sizeof(a) result(res) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + end function z_oacc_hll_sizeof + + + + function z_oacc_hll_is_host(a) result(res) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_oacc_hll_is_host + + function z_oacc_hll_is_sync(a) result(res) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_oacc_hll_is_sync + + function z_oacc_hll_is_dev(a) result(res) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_oacc_hll_is_dev + + subroutine z_oacc_hll_set_host(a) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_oacc_hll_set_host + + subroutine z_oacc_hll_set_sync(a) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_oacc_hll_set_sync + + subroutine z_oacc_hll_set_dev(a) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_oacc_hll_set_dev + + function z_oacc_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL_oacc' + end function z_oacc_hll_get_fmt + + subroutine z_oacc_hll_sync_space(a) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call z_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + if (allocated(a%hkoffs)) then + call i_oacc_create_dev_scalar(a%hkoffs) + end if + + contains + subroutine z_oacc_create_dev(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine z_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + + end subroutine z_oacc_hll_sync_space + + + subroutine z_oacc_hll_sync(a) + implicit none + class(psb_z_oacc_hll_sparse_mat), target, intent(in) :: a + class(psb_z_oacc_hll_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call z_oacc_hll_to_host(a%val) + call i_oacc_hll_to_host(a%ja) + call i_oacc_hll_to_host_scalar(a%irn) + call i_oacc_hll_to_host_scalar(a%idiag) + call i_oacc_hll_to_host_scalar(a%hkoffs) + else if (a%is_host()) then + call z_oacc_hll_to_dev(a%val) + call i_oacc_hll_to_dev(a%ja) + call i_oacc_hll_to_dev_scalar(a%irn) + call i_oacc_hll_to_dev_scalar(a%idiag) + call i_oacc_hll_to_dev_scalar(a%hkoffs) + end if + call tmpa%set_sync() + end subroutine z_oacc_hll_sync + + subroutine z_oacc_hll_to_host(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine z_oacc_hll_to_host + + subroutine z_oacc_hll_to_dev(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine z_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host + + subroutine i_oacc_hll_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host_scalar + + subroutine i_oacc_hll_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev_scalar + + +end module psb_z_oacc_hll_mat_mod diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90 index 5d03b49d..0bac854a 100644 --- a/openacc/psb_z_oacc_vect_mod.F90 +++ b/openacc/psb_z_oacc_vect_mod.F90 @@ -42,6 +42,7 @@ module psb_z_oacc_vect_mod procedure, pass(y) :: sctb_buf => z_oacc_sctb_buf procedure, pass(x) :: get_size => z_oacc_get_size + procedure, pass(x) :: dot_v => z_oacc_vect_dot procedure, pass(x) :: dot_a => z_oacc_dot_a procedure, pass(y) :: axpby_v => z_oacc_axpby_v @@ -70,7 +71,6 @@ module psb_z_oacc_vect_mod end subroutine z_oacc_mlt_v end interface - interface subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) import @@ -83,7 +83,7 @@ module psb_z_oacc_vect_mod character(len=1), intent(in), optional :: conjgx, conjgy end subroutine z_oacc_mlt_v_2 end interface - + contains subroutine z_oacc_absval1(x) @@ -432,7 +432,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() if (y%is_host()) call y%sync_space() !$acc parallel loop @@ -459,7 +459,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'z_oacc_sctb_x') return @@ -475,8 +475,6 @@ contains call y%set_dev() end subroutine z_oacc_sctb_x - - subroutine z_oacc_sctb(n, idx, x, beta, y) use psb_base_mod implicit none @@ -498,7 +496,6 @@ contains call y%set_host() end subroutine z_oacc_sctb - subroutine z_oacc_gthzbuf(i, n, idx, x) use psb_base_mod implicit none @@ -515,7 +512,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'z_oacc_gthzbuf') return @@ -542,7 +539,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'z_oacc_gthzv_x') return @@ -577,7 +574,7 @@ contains select type(vval => val) type is (psb_z_vect_oacc) if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space(info) + if (virl%is_host()) call virl%sync_space() if (x%is_host()) call x%sync_space() !$acc parallel loop do i = 1, n @@ -591,7 +588,7 @@ contains if (.not.done_oacc) then select type(virl => irl) type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space(info) + if (virl%is_dev()) call virl%sync_space() end select select type(vval => val) type is (psb_z_vect_oacc) @@ -607,8 +604,6 @@ contains end subroutine z_oacc_ins_v - - subroutine z_oacc_ins_a(n, irl, val, dupl, x, info) use psi_serial_mod implicit none @@ -628,8 +623,6 @@ contains end subroutine z_oacc_ins_a - - subroutine z_oacc_bld_mn(x, n) use psb_base_mod implicit none @@ -668,7 +661,6 @@ contains end subroutine z_oacc_bld_x - subroutine z_oacc_asb_m(n, x, info) use psb_base_mod implicit none @@ -696,8 +688,6 @@ contains end if end subroutine z_oacc_asb_m - - subroutine z_oacc_set_scal(x, val, first, last) class(psb_z_vect_oacc), intent(inout) :: x complex(psb_dpk_), intent(in) :: val @@ -718,8 +708,6 @@ contains call x%set_dev() end subroutine z_oacc_set_scal - - subroutine z_oacc_zero(x) use psi_serial_mod implicit none @@ -743,6 +731,7 @@ contains end function z_oacc_get_fmt + function z_oacc_vect_dot(n, x, y) result(res) implicit none class(psb_z_vect_oacc), intent(inout) :: x @@ -776,9 +765,6 @@ contains end function z_oacc_vect_dot - - - function z_oacc_dot_a(n, x, y) result(res) implicit none class(psb_z_vect_oacc), intent(inout) :: x @@ -910,7 +896,6 @@ contains end if end subroutine z_oacc_vect_all - subroutine z_oacc_vect_free(x, info) implicit none class(psb_z_vect_oacc), intent(inout) :: x