Implementation in OpenACC for ELL and HLL into templates. Merge from development

oacc_loloum
sfilippone 5 months ago
parent ff8513b4c6
commit 2982aaee27

@ -99,8 +99,8 @@ module psi_c_serial_mod
end subroutine psi_caxpbyv2
end interface psb_geaxpby
interface psi_upd_xyz
subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
interface psi_abgdxyz
subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
@ -109,8 +109,8 @@ module psi_c_serial_mod
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_c_upd_xyz
end interface psi_upd_xyz
end subroutine psi_cabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)

@ -99,8 +99,8 @@ module psi_d_serial_mod
end subroutine psi_daxpbyv2
end interface psb_geaxpby
interface psi_upd_xyz
subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
interface psi_abgdxyz
subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
@ -109,8 +109,8 @@ module psi_d_serial_mod
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_d_upd_xyz
end interface psi_upd_xyz
end subroutine psi_dabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)

@ -99,8 +99,8 @@ module psi_e_serial_mod
end subroutine psi_eaxpbyv2
end interface psb_geaxpby
interface psi_upd_xyz
subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
interface psi_abgdxyz
subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
@ -109,8 +109,8 @@ module psi_e_serial_mod
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_e_upd_xyz
end interface psi_upd_xyz
end subroutine psi_eabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)

@ -99,8 +99,8 @@ module psi_i2_serial_mod
end subroutine psi_i2axpbyv2
end interface psb_geaxpby
interface psi_upd_xyz
subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
interface psi_abgdxyz
subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
@ -109,8 +109,8 @@ module psi_i2_serial_mod
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_i2_upd_xyz
end interface psi_upd_xyz
end subroutine psi_i2abgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)

@ -99,8 +99,8 @@ module psi_m_serial_mod
end subroutine psi_maxpbyv2
end interface psb_geaxpby
interface psi_upd_xyz
subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
interface psi_abgdxyz
subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
@ -109,8 +109,8 @@ module psi_m_serial_mod
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_m_upd_xyz
end interface psi_upd_xyz
end subroutine psi_mabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)

@ -99,8 +99,8 @@ module psi_s_serial_mod
end subroutine psi_saxpbyv2
end interface psb_geaxpby
interface psi_upd_xyz
subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
interface psi_abgdxyz
subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
@ -109,8 +109,8 @@ module psi_s_serial_mod
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_s_upd_xyz
end interface psi_upd_xyz
end subroutine psi_sabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)

@ -99,8 +99,8 @@ module psi_z_serial_mod
end subroutine psi_zaxpbyv2
end interface psb_geaxpby
interface psi_upd_xyz
subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
interface psi_abgdxyz
subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
@ -109,8 +109,8 @@ module psi_z_serial_mod
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_z_upd_xyz
end interface psi_upd_xyz
end subroutine psi_zabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)

@ -143,8 +143,8 @@ module psb_c_psblas_mod
end subroutine psb_caxpby
end interface
interface psb_upd_xyz
subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
interface psb_abgdxyz
subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_spk_, psb_ipk_, &
& psb_c_vect_type, psb_cspmat_type
@ -154,8 +154,8 @@ module psb_c_psblas_mod
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_upd_xyz_vect
end interface psb_upd_xyz
end subroutine psb_cabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax
function psb_camax(x, desc_a, info, jx,global)

@ -143,8 +143,8 @@ module psb_d_psblas_mod
end subroutine psb_daxpby
end interface
interface psb_upd_xyz
subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
interface psb_abgdxyz
subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &
& psb_d_vect_type, psb_dspmat_type
@ -154,8 +154,8 @@ module psb_d_psblas_mod
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_upd_xyz_vect
end interface psb_upd_xyz
end subroutine psb_dabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax
function psb_damax(x, desc_a, info, jx,global)

@ -143,8 +143,8 @@ module psb_s_psblas_mod
end subroutine psb_saxpby
end interface
interface psb_upd_xyz
subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
interface psb_abgdxyz
subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_spk_, psb_ipk_, &
& psb_s_vect_type, psb_sspmat_type
@ -154,8 +154,8 @@ module psb_s_psblas_mod
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_upd_xyz_vect
end interface psb_upd_xyz
end subroutine psb_sabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax
function psb_samax(x, desc_a, info, jx,global)

@ -143,8 +143,8 @@ module psb_z_psblas_mod
end subroutine psb_zaxpby
end interface
interface psb_upd_xyz
subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
interface psb_abgdxyz
subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &
& psb_z_vect_type, psb_zspmat_type
@ -154,8 +154,8 @@ module psb_z_psblas_mod
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_upd_xyz_vect
end interface psb_upd_xyz
end subroutine psb_zabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax
function psb_zamax(x, desc_a, info, jx,global)

@ -155,7 +155,7 @@ module psb_c_base_vect_mod
procedure, pass(z) :: axpby_v2 => c_base_axpby_v2
procedure, pass(z) :: axpby_a2 => c_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => c_base_upd_xyz
procedure, pass(z) :: abgdxyz => c_base_abgdxyz
procedure, pass(w) :: xyzw => c_base_xyzw
!
@ -1130,12 +1130,12 @@ contains
end subroutine c_base_axpby_a2
!
! UPD_XYZ is invoked via Z, hence the structure below.
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_upd_xyz
!> Function base_abgdxyz
!! \memberof psb_c_base_vect_type
!! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@ -1146,7 +1146,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine c_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -1159,11 +1159,11 @@ contains
if (x%is_dev().and.(alpha/=czero)) call x%sync()
if (y%is_dev().and.(beta/=czero)) call y%sync()
if (z%is_dev().and.(delta/=czero)) call z%sync()
call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine c_base_upd_xyz
end subroutine c_base_abgdxyz
subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod

@ -102,7 +102,7 @@ module psb_c_vect_mod
procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => c_vect_upd_xyz
procedure, pass(z) :: abgdxyz => c_vect_abgdxyz
procedure, pass(z) :: xyzw => c_vect_xyzw
procedure, pass(y) :: mlt_v => c_vect_mlt_v
@ -774,7 +774,7 @@ contains
end subroutine c_vect_axpby_a2
subroutine c_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -785,9 +785,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine c_vect_upd_xyz
end subroutine c_vect_abgdxyz
subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod

@ -155,7 +155,7 @@ module psb_d_base_vect_mod
procedure, pass(z) :: axpby_v2 => d_base_axpby_v2
procedure, pass(z) :: axpby_a2 => d_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => d_base_upd_xyz
procedure, pass(z) :: abgdxyz => d_base_abgdxyz
procedure, pass(w) :: xyzw => d_base_xyzw
!
@ -1137,12 +1137,12 @@ contains
end subroutine d_base_axpby_a2
!
! UPD_XYZ is invoked via Z, hence the structure below.
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_upd_xyz
!> Function base_abgdxyz
!! \memberof psb_d_base_vect_type
!! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@ -1153,7 +1153,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine d_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -1166,11 +1166,11 @@ contains
if (x%is_dev().and.(alpha/=dzero)) call x%sync()
if (y%is_dev().and.(beta/=dzero)) call y%sync()
if (z%is_dev().and.(delta/=dzero)) call z%sync()
call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine d_base_upd_xyz
end subroutine d_base_abgdxyz
subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod

@ -102,7 +102,7 @@ module psb_d_vect_mod
procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => d_vect_upd_xyz
procedure, pass(z) :: abgdxyz => d_vect_abgdxyz
procedure, pass(z) :: xyzw => d_vect_xyzw
procedure, pass(y) :: mlt_v => d_vect_mlt_v
@ -781,7 +781,7 @@ contains
end subroutine d_vect_axpby_a2
subroutine d_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -792,9 +792,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine d_vect_upd_xyz
end subroutine d_vect_abgdxyz
subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod

@ -155,7 +155,7 @@ module psb_s_base_vect_mod
procedure, pass(z) :: axpby_v2 => s_base_axpby_v2
procedure, pass(z) :: axpby_a2 => s_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => s_base_upd_xyz
procedure, pass(z) :: abgdxyz => s_base_abgdxyz
procedure, pass(w) :: xyzw => s_base_xyzw
!
@ -1137,12 +1137,12 @@ contains
end subroutine s_base_axpby_a2
!
! UPD_XYZ is invoked via Z, hence the structure below.
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_upd_xyz
!> Function base_abgdxyz
!! \memberof psb_s_base_vect_type
!! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@ -1153,7 +1153,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine s_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -1166,11 +1166,11 @@ contains
if (x%is_dev().and.(alpha/=szero)) call x%sync()
if (y%is_dev().and.(beta/=szero)) call y%sync()
if (z%is_dev().and.(delta/=szero)) call z%sync()
call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine s_base_upd_xyz
end subroutine s_base_abgdxyz
subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod

@ -102,7 +102,7 @@ module psb_s_vect_mod
procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => s_vect_upd_xyz
procedure, pass(z) :: abgdxyz => s_vect_abgdxyz
procedure, pass(z) :: xyzw => s_vect_xyzw
procedure, pass(y) :: mlt_v => s_vect_mlt_v
@ -781,7 +781,7 @@ contains
end subroutine s_vect_axpby_a2
subroutine s_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -792,9 +792,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine s_vect_upd_xyz
end subroutine s_vect_abgdxyz
subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod

@ -155,7 +155,7 @@ module psb_z_base_vect_mod
procedure, pass(z) :: axpby_v2 => z_base_axpby_v2
procedure, pass(z) :: axpby_a2 => z_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => z_base_upd_xyz
procedure, pass(z) :: abgdxyz => z_base_abgdxyz
procedure, pass(w) :: xyzw => z_base_xyzw
!
@ -1130,12 +1130,12 @@ contains
end subroutine z_base_axpby_a2
!
! UPD_XYZ is invoked via Z, hence the structure below.
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_upd_xyz
!> Function base_abgdxyz
!! \memberof psb_z_base_vect_type
!! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@ -1146,7 +1146,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine z_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -1159,11 +1159,11 @@ contains
if (x%is_dev().and.(alpha/=zzero)) call x%sync()
if (y%is_dev().and.(beta/=zzero)) call y%sync()
if (z%is_dev().and.(delta/=zzero)) call z%sync()
call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine z_base_upd_xyz
end subroutine z_base_abgdxyz
subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod

@ -102,7 +102,7 @@ module psb_z_vect_mod
procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => z_vect_upd_xyz
procedure, pass(z) :: abgdxyz => z_vect_abgdxyz
procedure, pass(z) :: xyzw => z_vect_xyzw
procedure, pass(y) :: mlt_v => z_vect_mlt_v
@ -774,7 +774,7 @@ contains
end subroutine z_vect_axpby_a2
subroutine z_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -785,9 +785,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine z_vect_upd_xyz
end subroutine z_vect_abgdxyz
subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod

@ -743,9 +743,9 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info)
end subroutine psb_caddconst_vect
subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_c_upd_xyz_vect
use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect
implicit none
type(psb_c_vect_type), intent (inout) :: x
type(psb_c_vect_type), intent (inout) :: y
@ -812,7 +812,7 @@ subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@ -822,5 +822,5 @@ subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
end subroutine psb_c_upd_xyz_vect
end subroutine psb_cabgdxyz_vect

@ -743,9 +743,9 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info)
end subroutine psb_daddconst_vect
subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_d_upd_xyz_vect
use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect
implicit none
type(psb_d_vect_type), intent (inout) :: x
type(psb_d_vect_type), intent (inout) :: y
@ -812,7 +812,7 @@ subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@ -822,5 +822,5 @@ subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
end subroutine psb_d_upd_xyz_vect
end subroutine psb_dabgdxyz_vect

@ -743,9 +743,9 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info)
end subroutine psb_saddconst_vect
subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_s_upd_xyz_vect
use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect
implicit none
type(psb_s_vect_type), intent (inout) :: x
type(psb_s_vect_type), intent (inout) :: y
@ -812,7 +812,7 @@ subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@ -822,5 +822,5 @@ subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
end subroutine psb_s_upd_xyz_vect
end subroutine psb_sabgdxyz_vect

@ -743,9 +743,9 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info)
end subroutine psb_zaddconst_vect
subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_z_upd_xyz_vect
use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect
implicit none
type(psb_z_vect_type), intent (inout) :: x
type(psb_z_vect_type), intent (inout) :: y
@ -812,7 +812,7 @@ subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@ -822,5 +822,5 @@ subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
end subroutine psb_z_upd_xyz_vect
end subroutine psb_zabgdxyz_vect

@ -1568,7 +1568,7 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine caxpbyv2
subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@ -1582,7 +1582,7 @@ subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='c_upd_xyz'
name='cabgdxyz'
info = psb_success_
if (m.lt.0) then
@ -1791,7 +1791,7 @@ subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
end subroutine psi_c_upd_xyz
end subroutine psi_cabgdxyz
subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@ -1808,7 +1808,7 @@ subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='c_xyzw'
name='cabgdxyz'
info = psb_success_
if (m.lt.0) then

@ -1568,7 +1568,7 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine daxpbyv2
subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@ -1582,7 +1582,7 @@ subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='d_upd_xyz'
name='dabgdxyz'
info = psb_success_
if (m.lt.0) then
@ -1791,7 +1791,7 @@ subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
end subroutine psi_d_upd_xyz
end subroutine psi_dabgdxyz
subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@ -1808,7 +1808,7 @@ subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='d_xyzw'
name='dabgdxyz'
info = psb_success_
if (m.lt.0) then

@ -1568,7 +1568,7 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine eaxpbyv2
subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@ -1582,7 +1582,7 @@ subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='e_upd_xyz'
name='eabgdxyz'
info = psb_success_
if (m.lt.0) then
@ -1791,7 +1791,7 @@ subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
end subroutine psi_e_upd_xyz
end subroutine psi_eabgdxyz
subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@ -1808,7 +1808,7 @@ subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='e_xyzw'
name='eabgdxyz'
info = psb_success_
if (m.lt.0) then

@ -1568,7 +1568,7 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine i2axpbyv2
subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@ -1582,7 +1582,7 @@ subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='i2_upd_xyz'
name='i2abgdxyz'
info = psb_success_
if (m.lt.0) then
@ -1791,7 +1791,7 @@ subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
end subroutine psi_i2_upd_xyz
end subroutine psi_i2abgdxyz
subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@ -1808,7 +1808,7 @@ subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='i2_xyzw'
name='i2abgdxyz'
info = psb_success_
if (m.lt.0) then

@ -1568,7 +1568,7 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine maxpbyv2
subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@ -1582,7 +1582,7 @@ subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='m_upd_xyz'
name='mabgdxyz'
info = psb_success_
if (m.lt.0) then
@ -1791,7 +1791,7 @@ subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
end subroutine psi_m_upd_xyz
end subroutine psi_mabgdxyz
subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@ -1808,7 +1808,7 @@ subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='m_xyzw'
name='mabgdxyz'
info = psb_success_
if (m.lt.0) then

@ -1568,7 +1568,7 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine saxpbyv2
subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@ -1582,7 +1582,7 @@ subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='s_upd_xyz'
name='sabgdxyz'
info = psb_success_
if (m.lt.0) then
@ -1791,7 +1791,7 @@ subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
end subroutine psi_s_upd_xyz
end subroutine psi_sabgdxyz
subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@ -1808,7 +1808,7 @@ subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='s_xyzw'
name='sabgdxyz'
info = psb_success_
if (m.lt.0) then

@ -1568,7 +1568,7 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine zaxpbyv2
subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@ -1582,7 +1582,7 @@ subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='z_upd_xyz'
name='zabgdxyz'
info = psb_success_
if (m.lt.0) then
@ -1791,7 +1791,7 @@ subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
end subroutine psi_z_upd_xyz
end subroutine psi_zabgdxyz
subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@ -1808,7 +1808,7 @@ subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='z_xyzw'
name='zabgdxyz'
info = psb_success_
if (m.lt.0) then

@ -90,7 +90,7 @@ module psb_c_cuda_vect_mod
procedure, pass(x) :: dot_a => c_cuda_dot_a
procedure, pass(y) :: axpby_v => c_cuda_axpby_v
procedure, pass(y) :: axpby_a => c_cuda_axpby_a
procedure, pass(z) :: upd_xyz => c_cuda_upd_xyz
procedure, pass(z) :: abgdxyz => c_cuda_abgdxyz
procedure, pass(y) :: mlt_v => c_cuda_mlt_v
procedure, pass(y) :: mlt_a => c_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2
@ -912,7 +912,7 @@ contains
end subroutine c_cuda_axpby_v
subroutine c_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -946,7 +946,7 @@ contains
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = upd_xyzMultiVecDevice(m,alpha,beta,gamma,delta,&
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
@ -972,7 +972,7 @@ contains
call z%axpby(m,gamma,y,delta,info)
end if
end subroutine c_cuda_upd_xyz
end subroutine c_cuda_abgdxyz
subroutine c_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod

@ -313,16 +313,16 @@ module psb_c_vectordev_mod
end function axpbyMultiVecDeviceFloatComplex
end interface
interface upd_xyzMultiVecDevice
function upd_xyzMultiVecDeviceFloatComplex(n,alpha,beta,gamma,delta,deviceVecX,&
interface abgdxyzMultiVecDevice
function abgdxyzMultiVecDeviceFloatComplex(n,alpha,beta,gamma,delta,deviceVecX,&
& deviceVecY,deviceVecZ) &
& result(res) bind(c,name='upd_xyzMultiVecDeviceFloatComplex')
& result(res) bind(c,name='abgdxyzMultiVecDeviceFloatComplex')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
complex(c_float_complex), value :: alpha, beta,gamma,delta
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ
end function upd_xyzMultiVecDeviceFloatComplex
end function abgdxyzMultiVecDeviceFloatComplex
end interface
interface xyzwMultiVecDevice

@ -90,7 +90,7 @@ module psb_d_cuda_vect_mod
procedure, pass(x) :: dot_a => d_cuda_dot_a
procedure, pass(y) :: axpby_v => d_cuda_axpby_v
procedure, pass(y) :: axpby_a => d_cuda_axpby_a
procedure, pass(z) :: upd_xyz => d_cuda_upd_xyz
procedure, pass(z) :: abgdxyz => d_cuda_abgdxyz
procedure, pass(y) :: mlt_v => d_cuda_mlt_v
procedure, pass(y) :: mlt_a => d_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2
@ -912,7 +912,7 @@ contains
end subroutine d_cuda_axpby_v
subroutine d_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine d_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -946,7 +946,7 @@ contains
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = upd_xyzMultiVecDevice(m,alpha,beta,gamma,delta,&
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
@ -972,7 +972,7 @@ contains
call z%axpby(m,gamma,y,delta,info)
end if
end subroutine d_cuda_upd_xyz
end subroutine d_cuda_abgdxyz
subroutine d_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod

@ -313,16 +313,16 @@ module psb_d_vectordev_mod
end function axpbyMultiVecDeviceDouble
end interface
interface upd_xyzMultiVecDevice
function upd_xyzMultiVecDeviceDouble(n,alpha,beta,gamma,delta,deviceVecX,&
interface abgdxyzMultiVecDevice
function abgdxyzMultiVecDeviceDouble(n,alpha,beta,gamma,delta,deviceVecX,&
& deviceVecY,deviceVecZ) &
& result(res) bind(c,name='upd_xyzMultiVecDeviceDouble')
& result(res) bind(c,name='abgdxyzMultiVecDeviceDouble')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
real(c_double), value :: alpha, beta,gamma,delta
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ
end function upd_xyzMultiVecDeviceDouble
end function abgdxyzMultiVecDeviceDouble
end interface
interface xyzwMultiVecDevice

@ -90,7 +90,7 @@ module psb_s_cuda_vect_mod
procedure, pass(x) :: dot_a => s_cuda_dot_a
procedure, pass(y) :: axpby_v => s_cuda_axpby_v
procedure, pass(y) :: axpby_a => s_cuda_axpby_a
procedure, pass(z) :: upd_xyz => s_cuda_upd_xyz
procedure, pass(z) :: abgdxyz => s_cuda_abgdxyz
procedure, pass(y) :: mlt_v => s_cuda_mlt_v
procedure, pass(y) :: mlt_a => s_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2
@ -912,7 +912,7 @@ contains
end subroutine s_cuda_axpby_v
subroutine s_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine s_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -946,7 +946,7 @@ contains
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = upd_xyzMultiVecDevice(m,alpha,beta,gamma,delta,&
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
@ -972,7 +972,7 @@ contains
call z%axpby(m,gamma,y,delta,info)
end if
end subroutine s_cuda_upd_xyz
end subroutine s_cuda_abgdxyz
subroutine s_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod

@ -313,16 +313,16 @@ module psb_s_vectordev_mod
end function axpbyMultiVecDeviceFloat
end interface
interface upd_xyzMultiVecDevice
function upd_xyzMultiVecDeviceFloat(n,alpha,beta,gamma,delta,deviceVecX,&
interface abgdxyzMultiVecDevice
function abgdxyzMultiVecDeviceFloat(n,alpha,beta,gamma,delta,deviceVecX,&
& deviceVecY,deviceVecZ) &
& result(res) bind(c,name='upd_xyzMultiVecDeviceFloat')
& result(res) bind(c,name='abgdxyzMultiVecDeviceFloat')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
real(c_float), value :: alpha, beta,gamma,delta
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ
end function upd_xyzMultiVecDeviceFloat
end function abgdxyzMultiVecDeviceFloat
end interface
interface xyzwMultiVecDevice

@ -90,7 +90,7 @@ module psb_z_cuda_vect_mod
procedure, pass(x) :: dot_a => z_cuda_dot_a
procedure, pass(y) :: axpby_v => z_cuda_axpby_v
procedure, pass(y) :: axpby_a => z_cuda_axpby_a
procedure, pass(z) :: upd_xyz => z_cuda_upd_xyz
procedure, pass(z) :: abgdxyz => z_cuda_abgdxyz
procedure, pass(y) :: mlt_v => z_cuda_mlt_v
procedure, pass(y) :: mlt_a => z_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2
@ -912,7 +912,7 @@ contains
end subroutine z_cuda_axpby_v
subroutine z_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine z_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -946,7 +946,7 @@ contains
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = upd_xyzMultiVecDevice(m,alpha,beta,gamma,delta,&
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
@ -972,7 +972,7 @@ contains
call z%axpby(m,gamma,y,delta,info)
end if
end subroutine z_cuda_upd_xyz
end subroutine z_cuda_abgdxyz
subroutine z_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod

@ -313,16 +313,16 @@ module psb_z_vectordev_mod
end function axpbyMultiVecDeviceDoubleComplex
end interface
interface upd_xyzMultiVecDevice
function upd_xyzMultiVecDeviceDoubleComplex(n,alpha,beta,gamma,delta,deviceVecX,&
interface abgdxyzMultiVecDevice
function abgdxyzMultiVecDeviceDoubleComplex(n,alpha,beta,gamma,delta,deviceVecX,&
& deviceVecY,deviceVecZ) &
& result(res) bind(c,name='upd_xyzMultiVecDeviceDoubleComplex')
& result(res) bind(c,name='abgdxyzMultiVecDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
complex(c_double_complex), value :: alpha, beta,gamma,delta
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ
end function upd_xyzMultiVecDeviceDoubleComplex
end function abgdxyzMultiVecDeviceDoubleComplex
end interface
interface xyzwMultiVecDevice

@ -17,12 +17,15 @@ CINCLUDES=
#LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis
FOBJS= psb_i_oacc_vect_mod.o \
FOBJS= psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o \
psb_s_oacc_vect_mod.o psb_s_oacc_csr_mat_mod.o \
psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \
psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \
psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \
psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o\
psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o \
psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o \
psb_c_oacc_ell_mat_mod.o psb_c_oacc_hll_mat_mod.o \
psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o \
psb_oacc_mod.o psb_oacc_env_mod.o
@ -44,24 +47,26 @@ iobjs: $(OBJS)
ilib: $(OBJS)
$(MAKE) -C impl lib
psb_oacc_mod.o : psb_i_oacc_vect_mod.o \
psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o \
psb_s_oacc_vect_mod.o psb_s_oacc_csr_mat_mod.o \
psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \
psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \
psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \
psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o \
psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o \
psb_c_oacc_ell_mat_mod.o psb_c_oacc_hll_mat_mod.o \
psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o \
psb_oacc_env_mod.o
psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o : psb_i_oacc_vect_mod.o
psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o : psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o
psb_s_oacc_csr_mat_mod.o psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o: psb_s_oacc_vect_mod.o
psb_d_oacc_csr_mat_mod.o psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o: psb_d_oacc_vect_mod.o
psb_c_oacc_csr_mat_mod.o psb_c_oacc_ell_mat_mod.o psb_c_oacc_hll_mat_mod.o: psb_c_oacc_vect_mod.o
psb_z_oacc_csr_mat_mod.o psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o: psb_z_oacc_vect_mod.o
psb_s_oacc_csr_mat_mod.o: psb_s_oacc_vect_mod.o
psb_d_oacc_csr_mat_mod.o: psb_d_oacc_vect_mod.o
psb_c_oacc_csr_mat_mod.o: psb_c_oacc_vect_mod.o
psb_z_oacc_csr_mat_mod.o: psb_z_oacc_vect_mod.o
psb_d_oacc_ell_mat_mod.o: psb_d_oacc_vect_mod.o
psb_d_oacc_hll_mat_mod.o: psb_d_oacc_vect_mod.o
clean: cclean iclean

@ -71,6 +71,32 @@ psb_z_oacc_csr_mv_from_fmt.o \
psb_z_oacc_csr_mold.o \
psb_z_oacc_mlt_v_2.o \
psb_z_oacc_mlt_v.o \
psb_s_oacc_ell_vect_mv.o \
psb_s_oacc_ell_inner_vect_sv.o \
psb_s_oacc_ell_csmm.o \
psb_s_oacc_ell_csmv.o \
psb_s_oacc_ell_scals.o \
psb_s_oacc_ell_scal.o \
psb_s_oacc_ell_reallocate_nz.o \
psb_s_oacc_ell_allocate_mnnz.o \
psb_s_oacc_ell_cp_from_coo.o \
psb_s_oacc_ell_cp_from_fmt.o \
psb_s_oacc_ell_mv_from_coo.o \
psb_s_oacc_ell_mv_from_fmt.o \
psb_s_oacc_ell_mold.o \
psb_s_oacc_hll_mold.o \
psb_s_oacc_hll_mv_from_fmt.o \
psb_s_oacc_hll_mv_from_coo.o \
psb_s_oacc_hll_cp_from_fmt.o \
psb_s_oacc_hll_cp_from_coo.o \
psb_s_oacc_hll_allocate_mnnz.o \
psb_s_oacc_hll_reallocate_nz.o \
psb_s_oacc_hll_scal.o \
psb_s_oacc_hll_scals.o \
psb_s_oacc_hll_csmv.o \
psb_s_oacc_hll_csmm.o \
psb_s_oacc_hll_inner_vect_sv.o \
psb_s_oacc_hll_vect_mv.o \
psb_d_oacc_ell_vect_mv.o \
psb_d_oacc_ell_inner_vect_sv.o \
psb_d_oacc_ell_csmm.o \
@ -97,6 +123,58 @@ psb_d_oacc_hll_csmv.o \
psb_d_oacc_hll_csmm.o \
psb_d_oacc_hll_inner_vect_sv.o \
psb_d_oacc_hll_vect_mv.o \
psb_c_oacc_ell_vect_mv.o \
psb_c_oacc_ell_inner_vect_sv.o \
psb_c_oacc_ell_csmm.o \
psb_c_oacc_ell_csmv.o \
psb_c_oacc_ell_scals.o \
psb_c_oacc_ell_scal.o \
psb_c_oacc_ell_reallocate_nz.o \
psb_c_oacc_ell_allocate_mnnz.o \
psb_c_oacc_ell_cp_from_coo.o \
psb_c_oacc_ell_cp_from_fmt.o \
psb_c_oacc_ell_mv_from_coo.o \
psb_c_oacc_ell_mv_from_fmt.o \
psb_c_oacc_ell_mold.o \
psb_c_oacc_hll_mold.o \
psb_c_oacc_hll_mv_from_fmt.o \
psb_c_oacc_hll_mv_from_coo.o \
psb_c_oacc_hll_cp_from_fmt.o \
psb_c_oacc_hll_cp_from_coo.o \
psb_c_oacc_hll_allocate_mnnz.o \
psb_c_oacc_hll_reallocate_nz.o \
psb_c_oacc_hll_scal.o \
psb_c_oacc_hll_scals.o \
psb_c_oacc_hll_csmv.o \
psb_c_oacc_hll_csmm.o \
psb_c_oacc_hll_inner_vect_sv.o \
psb_c_oacc_hll_vect_mv.o \
psb_z_oacc_ell_vect_mv.o \
psb_z_oacc_ell_inner_vect_sv.o \
psb_z_oacc_ell_csmm.o \
psb_z_oacc_ell_csmv.o \
psb_z_oacc_ell_scals.o \
psb_z_oacc_ell_scal.o \
psb_z_oacc_ell_reallocate_nz.o \
psb_z_oacc_ell_allocate_mnnz.o \
psb_z_oacc_ell_cp_from_coo.o \
psb_z_oacc_ell_cp_from_fmt.o \
psb_z_oacc_ell_mv_from_coo.o \
psb_z_oacc_ell_mv_from_fmt.o \
psb_z_oacc_ell_mold.o \
psb_z_oacc_hll_mold.o \
psb_z_oacc_hll_mv_from_fmt.o \
psb_z_oacc_hll_mv_from_coo.o \
psb_z_oacc_hll_cp_from_fmt.o \
psb_z_oacc_hll_cp_from_coo.o \
psb_z_oacc_hll_allocate_mnnz.o \
psb_z_oacc_hll_reallocate_nz.o \
psb_z_oacc_hll_scal.o \
psb_z_oacc_hll_scals.o \
psb_z_oacc_hll_csmv.o \
psb_z_oacc_hll_csmm.o \
psb_z_oacc_hll_inner_vect_sv.o \
psb_z_oacc_hll_vect_mv.o
objs: $(OBJS)
@ -104,18 +182,18 @@ lib: objs
ar cur ../$(LIBNAME) $(OBJS)
psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \
#psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \
psb_d_oacc_csr_csmm.o psb_d_oacc_csr_csmv.o psb_d_oacc_csr_scals.o \
psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \
psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \
psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo.o \
psb_d_oacc_csr_mv_from_fmt.o psb_d_oacc_csr_mold.o: $(UP)/psb_d_oacc_csr_mat_mod.o
psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o: $(UP)/psb_d_oacc_vect_mod.o
#psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o: $(UP)/psb_d_oacc_vect_mod.o
clean:
/bin/rm -f $(OBJS)
/bin/rm -f $(OBJS) *.smod
.c.o:
$(CC) $(CCOPT) $(CCOPENACC) $(CINCLUDES) $(CDEFINES) -c $< -o $@

@ -0,0 +1,47 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_allocate_mnnz_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_allocate_mnnz(m, n, a, nz)
implicit none
integer(psb_ipk_), intent(in) :: m, n
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act, nz_
character(len=20) :: name='allocate_mnnz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(nz)) then
nz_ = nz
else
nz_ = 10
end if
call a%psb_c_ell_sparse_mat%allocate(m, n, nz_)
if (.not.allocated(a%val)) then
allocate(a%val(m, nz_))
allocate(a%ja(m, nz_))
allocate(a%irn(m))
allocate(a%idiag(m))
end if
a%val = czero
a%ja = -1
a%irn = 0
a%idiag = 0
call a%set_dev()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_ell_allocate_mnnz
end submodule psb_c_oacc_ell_allocate_mnnz_impl

@ -0,0 +1,78 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_cp_from_coo(a, b, info)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
complex(psb_spk_) :: value
integer(psb_ipk_), allocatable :: row_counts(:)
integer(psb_ipk_) :: hacksize, nza
info = psb_success_
hacksize = 1
call a%set_nrows(b%get_nrows())
call a%set_ncols(b%get_ncols())
nz_per_row = a%nzt
if (.not.allocated(a%val)) then
allocate(a%val(a%get_nrows(), nz_per_row))
allocate(a%ja(a%get_nrows(), nz_per_row))
allocate(a%irn(a%get_nrows()))
allocate(a%idiag(a%get_nrows()))
end if
a%val = czero
a%ja = -1
a%irn = 0
a%idiag = 0
allocate(row_counts(a%get_nrows()))
row_counts = 0
nza = b%get_nzeros()
!$acc parallel loop present(b, a, row_counts)
do k = 1, nza
row = b%ia(k)
col = b%ja(k)
value = b%val(k)
if (row_counts(row) < nz_per_row) then
a%val(row, row_counts(row) + 1) = value
a%ja(row, row_counts(row) + 1) = col
row_counts(row) = row_counts(row) + 1
else
info = psb_err_invalid_mat_state_
!goto 9999
end if
end do
a%irn = row_counts
!$acc parallel loop present(a)
do i = 1, a%get_nrows()
do j = 1, nz_per_row
if (a%ja(i, j) == i) then
a%idiag(i) = j
exit
end if
end do
end do
deallocate(row_counts)
call a%set_dev()
if (info /= 0) goto 9999
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_c_oacc_ell_cp_from_coo
end submodule psb_c_oacc_ell_cp_from_coo_impl

@ -0,0 +1,24 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_cp_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_cp_from_fmt(a, b, info)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%psb_c_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag)
end select
end subroutine psb_c_oacc_ell_cp_from_fmt
end submodule psb_c_oacc_ell_cp_from_fmt_impl

@ -0,0 +1,86 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_csmm_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
complex(psb_spk_), intent(in) :: x(:,:)
complex(psb_spk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt
logical :: tra
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'c_oacc_ell_csmm'
logical, parameter :: debug = .false.
info = psb_success_
call psb_erractionsave(err_act)
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1) < n) then
info = 36
call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
goto 9999
end if
if (size(y,1) < m) then
info = 36
call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
goto 9999
end if
if (tra) then
call a%psb_c_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
else
nxy = min(size(x,2), size(y,2))
nzt = a%nzt
!$acc parallel loop collapse(2) present(a, x, y)
do j = 1, nxy
do i = 1, m
y(i,j) = beta * y(i,j)
end do
end do
!$acc parallel loop collapse(2) present(a, x, y)
do j = 1, nxy
do i = 1, n
do k = 1, nzt
y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j)
end do
end do
end do
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_ell_csmm
end submodule psb_c_oacc_ell_csmm_impl

@ -0,0 +1,82 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_csmv_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
complex(psb_spk_), intent(in) :: x(:)
complex(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i, j, m, n, nzt
logical :: tra
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'c_oacc_ell_csmv'
logical, parameter :: debug = .false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1) < n) then
info = 36
call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
goto 9999
end if
if (size(y,1) < m) then
info = 36
call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
goto 9999
end if
if (tra) then
call a%psb_c_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
else
nzt = a%nzt
!$acc parallel loop present(a, x, y)
do i = 1, m
y(i) = beta * y(i)
end do
!$acc parallel loop present(a, x, y)
do i = 1, m
do j = 1, nzt
y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j))
end do
end do
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_ell_csmv
end submodule psb_c_oacc_ell_csmv_impl

@ -0,0 +1,85 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_inner_vect_sv_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
complex(psb_spk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'c_oacc_ell_inner_vect_sv'
logical, parameter :: debug = .false.
integer(psb_ipk_) :: i, j, nzt
call psb_get_erraction(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra .or. (beta /= dzero)) then
call x%sync()
call y%sync()
call a%psb_c_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
select type (xx => x)
type is (psb_c_vect_oacc)
select type(yy => y)
type is (psb_c_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
nzt = a%nzt
!$acc parallel loop present(a, xx, yy)
do i = 1, size(a%val, 1)
do j = 1, nzt
yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i)
end do
end do
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%psb_c_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%psb_c_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
endif
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info, name, a_err = 'ell_vect_sv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_ell_inner_vect_sv
end submodule psb_c_oacc_ell_inner_vect_sv_impl

@ -0,0 +1,34 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mold_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_mold(a, b, info)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'ell_mold'
logical, parameter :: debug = .false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b, stat=info)
end if
if (info == 0) allocate(psb_c_oacc_ell_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_ell_mold
end submodule psb_c_oacc_ell_mold_impl

@ -0,0 +1,25 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mv_from_coo_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_mv_from_coo(a, b, info)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irn, a%idiag)
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_c_oacc_ell_mv_from_coo
end submodule psb_c_oacc_ell_mv_from_coo_impl

@ -0,0 +1,24 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mv_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_mv_from_fmt(a, b, info)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_c_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%psb_c_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag)
end select
end subroutine psb_c_oacc_ell_mv_from_fmt
end submodule psb_c_oacc_ell_mv_from_fmt_impl

@ -0,0 +1,28 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_reallocate_nz_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_reallocate_nz(nz, a)
implicit none
integer(psb_ipk_), intent(in) :: nz
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='c_oacc_ell_reallocate_nz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
call a%psb_c_ell_sparse_mat%reallocate(nz)
call a%set_dev()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_ell_reallocate_nz
end submodule psb_c_oacc_ell_reallocate_nz_impl

@ -0,0 +1,58 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_scal_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_scal(d, a, info, side)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
complex(psb_spk_), intent(in) :: d(:)
integer(psb_ipk_), intent(out) :: info
character, intent(in), optional :: side
integer(psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: i, j, m, nzt
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
m = a%get_nrows()
nzt = a%nzt
if (present(side)) then
if (side == 'L') then
!$acc parallel loop collapse(2) present(a, d)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d(i)
end do
end do
else if (side == 'R') then
!$acc parallel loop collapse(2) present(a, d)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d(a%ja(i, j))
end do
end do
end if
else
!$acc parallel loop collapse(2) present(a, d)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d(j)
end do
end do
end if
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_ell_scal
end submodule psb_c_oacc_ell_scal_impl

@ -0,0 +1,39 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_scals_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_scals(d, a, info)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
complex(psb_spk_), intent(in) :: d
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: i, j, nzt, m
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
m = a%get_nrows()
nzt = a%nzt
!$acc parallel loop collapse(2) present(a)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d
end do
end do
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_ell_scals
end submodule psb_c_oacc_ell_scals_impl

@ -0,0 +1,66 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_vect_mv_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
implicit none
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
class(psb_c_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nzt
info = psb_success_
m = a%get_nrows()
n = a%get_ncols()
nzt = a%nzt
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
if (a%is_host()) call a%sync()
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info)
call y%set_dev()
contains
subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info)
implicit none
integer(psb_ipk_) :: m, n, nzt
complex(psb_spk_), intent(in) :: alpha, beta
complex(psb_spk_) :: val(:,:), x(:), y(:)
integer(psb_ipk_) :: ja(:,:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, ii, isz
complex(psb_spk_) :: tmp
integer(psb_ipk_), parameter :: vsz = 256
info = 0
!$acc parallel loop vector_length(vsz) private(isz)
do ii = 1, m, vsz
isz = min(vsz, m - ii + 1)
!$acc loop independent private(tmp)
do i = ii, ii + isz - 1
tmp = 0.0_psb_dpk_
!$acc loop seq
do j = 1, nzt
if (ja(i,j) > 0) then
tmp = tmp + val(i,j) * x(ja(i,j))
end if
end do
y(i) = alpha * tmp + beta * y(i)
end do
end do
end subroutine inner_spmv
end subroutine psb_c_oacc_ell_vect_mv
end submodule psb_c_oacc_ell_vect_mv_impl

@ -0,0 +1,53 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_allocate_mnnz_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_allocate_mnnz(m, n, a, nz)
implicit none
integer(psb_ipk_), intent(in) :: m, n
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act, nz_
character(len=20) :: name='allocate_mnnz'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: hksz, nhacks
call psb_erractionsave(err_act)
info = psb_success_
if (present(nz)) then
nz_ = nz
else
nz_ = 10
end if
call a%psb_c_hll_sparse_mat%allocate(m, n, nz_)
hksz = a%hksz
nhacks = (m + hksz - 1) / hksz
if (.not.allocated(a%val)) then
allocate(a%val(nz_ * m))
allocate(a%ja(nz_ * m))
allocate(a%irn(m))
allocate(a%idiag(m))
allocate(a%hkoffs(nhacks))
end if
a%val = czero
a%ja = -1
a%irn = 0
a%idiag = 0
a%hkoffs = 0
call a%set_dev()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_hll_allocate_mnnz
end submodule psb_c_oacc_hll_allocate_mnnz_impl

@ -0,0 +1,85 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_cp_from_coo(a, b, info)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
complex(psb_spk_) :: value
integer(psb_ipk_), allocatable :: row_counts(:)
integer(psb_ipk_) :: hacksize, nza
info = psb_success_
hacksize = 32 ! Assuming a default hack size of 32
call a%set_nrows(b%get_nrows())
call a%set_ncols(b%get_ncols())
nz_per_row = a%nzt
if (.not.allocated(a%val)) then
allocate(a%val(nz_per_row * a%get_nrows()))
allocate(a%ja(nz_per_row * a%get_nrows()))
allocate(a%irn(a%get_nrows()))
allocate(a%idiag(a%get_nrows()))
allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
end if
a%val = czero
a%ja = -1
a%irn = 0
a%idiag = 0
allocate(row_counts(a%get_nrows()))
row_counts = 0
nza = b%get_nzeros()
!$acc parallel loop present(b, a, row_counts)
do k = 1, nza
row = b%ia(k)
col = b%ja(k)
value = b%val(k)
if (row_counts(row) < nz_per_row) then
a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value
a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col
row_counts(row) = row_counts(row) + 1
else
info = psb_err_invalid_mat_state_
!goto 9999
end if
end do
a%irn = row_counts
!$acc parallel loop present(a)
do i = 1, a%get_nrows()
do j = 1, nz_per_row
if (a%ja(j + (i - 1) * nz_per_row) == i) then
a%idiag(i) = j
exit
end if
end do
end do
! Calculate hkoffs for HLL format
!$acc parallel loop present(a)
do i = 1, size(a%hkoffs)
a%hkoffs(i) = (i - 1) * hacksize
end do
deallocate(row_counts)
call a%set_dev()
if (info /= 0) goto 9999
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_c_oacc_hll_cp_from_coo
end submodule psb_c_oacc_hll_cp_from_coo_impl

@ -0,0 +1,24 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_cp_from_fmt(a, b, info)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%psb_c_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
end select
end subroutine psb_c_oacc_hll_cp_from_fmt
end submodule psb_c_oacc_hll_cp_from_fmt_impl

@ -0,0 +1,86 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_csmm_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
complex(psb_spk_), intent(in) :: x(:,:)
complex(psb_spk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks
logical :: tra
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'c_oacc_hll_csmm'
logical, parameter :: debug = .false.
info = psb_success_
call psb_erractionsave(err_act)
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1) < n) then
info = 36
call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
goto 9999
end if
if (size(y,1) < m) then
info = 36
call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
goto 9999
end if
if (tra) then
call a%psb_c_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
else
nxy = min(size(x,2), size(y,2))
nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz
!$acc parallel loop collapse(2) present(a, x, y)
do j = 1, nxy
do i = 1, m
y(i,j) = beta * y(i,j)
end do
end do
!$acc parallel loop present(a, x, y)
do j = 1, nxy
do k = 1, nhacks
do i = a%hkoffs(k), a%hkoffs(k + 1) - 1
y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j)
end do
end do
end do
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_hll_csmm
end submodule psb_c_oacc_hll_csmm_impl

@ -0,0 +1,84 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_csmv_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
complex(psb_spk_), intent(in) :: x(:)
complex(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i, j, m, n, hksz, nhacks
logical :: tra
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'c_oacc_hll_csmv'
logical, parameter :: debug = .false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1) < n) then
info = 36
call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
goto 9999
end if
if (size(y,1) < m) then
info = 36
call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
goto 9999
end if
if (tra) then
call a%psb_c_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
else
hksz = a%hksz
nhacks = (a%get_nrows() + hksz - 1) / hksz
!$acc parallel loop present(a, x, y)
do i = 1, m
y(i) = beta * y(i)
end do
! This loop nest cannot be run with collapse, since
! the inner loop extent varies.
!$acc parallel loop present(a, x, y)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j))
end do
end do
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_hll_csmv
end submodule psb_c_oacc_hll_csmv_impl

@ -0,0 +1,86 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_inner_vect_sv_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
complex(psb_spk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'c_oacc_hll_inner_vect_sv'
logical, parameter :: debug = .false.
integer(psb_ipk_) :: i, j, nhacks, hksz
call psb_get_erraction(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra .or. (beta /= dzero)) then
call x%sync()
call y%sync()
call a%psb_c_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
select type (xx => x)
type is (psb_c_vect_oacc)
select type(yy => y)
type is (psb_c_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
nhacks = size(a%hkoffs) - 1
hksz = a%hksz
!$acc parallel loop present(a, xx, yy)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i+1) - 1
yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j))
end do
end do
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%psb_c_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%psb_c_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
endif
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info, name, a_err = 'hll_vect_sv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_hll_inner_vect_sv
end submodule psb_c_oacc_hll_inner_vect_sv_impl

@ -0,0 +1,34 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mold_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_mold(a, b, info)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'hll_mold'
logical, parameter :: debug = .false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b, stat=info)
end if
if (info == 0) allocate(psb_c_oacc_hll_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_hll_mold
end submodule psb_c_oacc_hll_mold_impl

@ -0,0 +1,25 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mv_from_coo_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_mv_from_coo(a, b, info)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_c_oacc_hll_mv_from_coo
end submodule psb_c_oacc_hll_mv_from_coo_impl

@ -0,0 +1,24 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mv_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_mv_from_fmt(a, b, info)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_c_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%psb_c_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
end select
end subroutine psb_c_oacc_hll_mv_from_fmt
end submodule psb_c_oacc_hll_mv_from_fmt_impl

@ -0,0 +1,29 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_reallocate_nz_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_reallocate_nz(nz, a)
implicit none
integer(psb_ipk_), intent(in) :: nz
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='c_oacc_hll_reallocate_nz'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: hksz, nhacks
call psb_erractionsave(err_act)
info = psb_success_
call a%psb_c_hll_sparse_mat%reallocate(nz)
call a%set_dev()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_hll_reallocate_nz
end submodule psb_c_oacc_hll_reallocate_nz_impl

@ -0,0 +1,62 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_scal_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_scal(d, a, info, side)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
complex(psb_spk_), intent(in) :: d(:)
integer(psb_ipk_), intent(out) :: info
character, intent(in), optional :: side
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'scal'
integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
hksz = a%hksz
nhacks = (a%get_nrows() + hksz - 1) / hksz
nzt = a%nzt
if (present(side)) then
if (side == 'L') then
! $ a parallel loop collapse(2) present(a, d)
!$acc parallel loop present(a, d)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1
a%val(j) = a%val(j) * d(k)
end do
end do
else if (side == 'R') then
! $ a parallel loop collapse(2) present(a, d)
!$acc parallel loop present(a, d)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
a%val(j) = a%val(j) * d(a%ja(j))
end do
end do
end if
else
! $ a parallel loop collapse(2) present(a, d)
!$acc parallel loop present(a, d)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1)
end do
end do
end if
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_hll_scal
end submodule psb_c_oacc_hll_scal_impl

@ -0,0 +1,40 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_scals_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_scals(d, a, info)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
complex(psb_spk_), intent(in) :: d
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'scal'
integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
hksz = a%hksz
nhacks = (a%get_nrows() + hksz - 1) / hksz
nzt = a%nzt
! $ a parallel loop collapse(2) present(a)
!$acc parallel loop present(a)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
a%val(j) = a%val(j) * d
end do
end do
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_hll_scals
end submodule psb_c_oacc_hll_scals_impl

@ -0,0 +1,67 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_vect_mv_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
implicit none
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
class(psb_c_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nhacks, hksz
info = psb_success_
m = a%get_nrows()
n = a%get_ncols()
nhacks = size(a%hkoffs) - 1
hksz = a%hksz
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
if (a%is_host()) call a%sync()
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
call y%set_dev()
contains
subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info)
implicit none
integer(psb_ipk_) :: m, nhacks, hksz
complex(psb_spk_), intent(in) :: alpha, beta
complex(psb_spk_) :: val(:), x(:), y(:)
integer(psb_ipk_) :: ja(:), hkoffs(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, idx, k
complex(psb_spk_) :: tmp
info = 0
!$acc parallel loop present(val, ja, hkoffs, x, y)
do i = 1, nhacks
do k = 0, hksz - 1
idx = hkoffs(i) + k
if (idx <= hkoffs(i + 1) - 1) then
tmp = 0.0_psb_dpk_
!$acc loop seq
do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz
if (ja(j) > 0) then
tmp = tmp + val(j) * x(ja(j))
end if
end do
y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz)
end if
end do
end do
end subroutine inner_spmv
end subroutine psb_c_oacc_hll_vect_mv
end submodule psb_c_oacc_hll_vect_mv_impl

@ -29,7 +29,7 @@ contains
allocate(a%idiag(m))
end if
a%val = 0.0_psb_dpk_
a%val = dzero
a%ja = -1
a%irn = 0
a%idiag = 0

@ -26,7 +26,7 @@ contains
allocate(a%irn(a%get_nrows()))
allocate(a%idiag(a%get_nrows()))
end if
a%val = 0.0_psb_dpk_
a%val = dzero
a%ja = -1
a%irn = 0
a%idiag = 0

@ -34,7 +34,7 @@ contains
allocate(a%hkoffs(nhacks))
end if
a%val = 0.0_psb_dpk_
a%val = dzero
a%ja = -1
a%irn = 0
a%idiag = 0

@ -27,7 +27,7 @@ contains
allocate(a%idiag(a%get_nrows()))
allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
end if
a%val = 0.0_psb_dpk_
a%val = dzero
a%ja = -1
a%irn = 0
a%idiag = 0

@ -0,0 +1,47 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_allocate_mnnz_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_allocate_mnnz(m, n, a, nz)
implicit none
integer(psb_ipk_), intent(in) :: m, n
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act, nz_
character(len=20) :: name='allocate_mnnz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(nz)) then
nz_ = nz
else
nz_ = 10
end if
call a%psb_s_ell_sparse_mat%allocate(m, n, nz_)
if (.not.allocated(a%val)) then
allocate(a%val(m, nz_))
allocate(a%ja(m, nz_))
allocate(a%irn(m))
allocate(a%idiag(m))
end if
a%val = szero
a%ja = -1
a%irn = 0
a%idiag = 0
call a%set_dev()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_ell_allocate_mnnz
end submodule psb_s_oacc_ell_allocate_mnnz_impl

@ -0,0 +1,78 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_cp_from_coo(a, b, info)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
real(psb_spk_) :: value
integer(psb_ipk_), allocatable :: row_counts(:)
integer(psb_ipk_) :: hacksize, nza
info = psb_success_
hacksize = 1
call a%set_nrows(b%get_nrows())
call a%set_ncols(b%get_ncols())
nz_per_row = a%nzt
if (.not.allocated(a%val)) then
allocate(a%val(a%get_nrows(), nz_per_row))
allocate(a%ja(a%get_nrows(), nz_per_row))
allocate(a%irn(a%get_nrows()))
allocate(a%idiag(a%get_nrows()))
end if
a%val = szero
a%ja = -1
a%irn = 0
a%idiag = 0
allocate(row_counts(a%get_nrows()))
row_counts = 0
nza = b%get_nzeros()
!$acc parallel loop present(b, a, row_counts)
do k = 1, nza
row = b%ia(k)
col = b%ja(k)
value = b%val(k)
if (row_counts(row) < nz_per_row) then
a%val(row, row_counts(row) + 1) = value
a%ja(row, row_counts(row) + 1) = col
row_counts(row) = row_counts(row) + 1
else
info = psb_err_invalid_mat_state_
!goto 9999
end if
end do
a%irn = row_counts
!$acc parallel loop present(a)
do i = 1, a%get_nrows()
do j = 1, nz_per_row
if (a%ja(i, j) == i) then
a%idiag(i) = j
exit
end if
end do
end do
deallocate(row_counts)
call a%set_dev()
if (info /= 0) goto 9999
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_s_oacc_ell_cp_from_coo
end submodule psb_s_oacc_ell_cp_from_coo_impl

@ -0,0 +1,24 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_cp_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_cp_from_fmt(a, b, info)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_s_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%psb_s_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag)
end select
end subroutine psb_s_oacc_ell_cp_from_fmt
end submodule psb_s_oacc_ell_cp_from_fmt_impl

@ -0,0 +1,86 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_csmm_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta
real(psb_spk_), intent(in) :: x(:,:)
real(psb_spk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt
logical :: tra
integer(psb_ipk_) :: err_act
character(len=20) :: name = 's_oacc_ell_csmm'
logical, parameter :: debug = .false.
info = psb_success_
call psb_erractionsave(err_act)
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1) < n) then
info = 36
call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
goto 9999
end if
if (size(y,1) < m) then
info = 36
call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
goto 9999
end if
if (tra) then
call a%psb_s_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
else
nxy = min(size(x,2), size(y,2))
nzt = a%nzt
!$acc parallel loop collapse(2) present(a, x, y)
do j = 1, nxy
do i = 1, m
y(i,j) = beta * y(i,j)
end do
end do
!$acc parallel loop collapse(2) present(a, x, y)
do j = 1, nxy
do i = 1, n
do k = 1, nzt
y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j)
end do
end do
end do
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_ell_csmm
end submodule psb_s_oacc_ell_csmm_impl

@ -0,0 +1,82 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_csmv_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta
real(psb_spk_), intent(in) :: x(:)
real(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i, j, m, n, nzt
logical :: tra
integer(psb_ipk_) :: err_act
character(len=20) :: name = 's_oacc_ell_csmv'
logical, parameter :: debug = .false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1) < n) then
info = 36
call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
goto 9999
end if
if (size(y,1) < m) then
info = 36
call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
goto 9999
end if
if (tra) then
call a%psb_s_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
else
nzt = a%nzt
!$acc parallel loop present(a, x, y)
do i = 1, m
y(i) = beta * y(i)
end do
!$acc parallel loop present(a, x, y)
do i = 1, m
do j = 1, nzt
y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j))
end do
end do
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_ell_csmv
end submodule psb_s_oacc_ell_csmv_impl

@ -0,0 +1,85 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_inner_vect_sv_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta
class(psb_s_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
real(psb_spk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
integer(psb_ipk_) :: err_act
character(len=20) :: name = 's_oacc_ell_inner_vect_sv'
logical, parameter :: debug = .false.
integer(psb_ipk_) :: i, j, nzt
call psb_get_erraction(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra .or. (beta /= dzero)) then
call x%sync()
call y%sync()
call a%psb_s_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
select type (xx => x)
type is (psb_s_vect_oacc)
select type(yy => y)
type is (psb_s_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
nzt = a%nzt
!$acc parallel loop present(a, xx, yy)
do i = 1, size(a%val, 1)
do j = 1, nzt
yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i)
end do
end do
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%psb_s_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%psb_s_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
endif
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info, name, a_err = 'ell_vect_sv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_ell_inner_vect_sv
end submodule psb_s_oacc_ell_inner_vect_sv_impl

@ -0,0 +1,34 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_mold_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_mold(a, b, info)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'ell_mold'
logical, parameter :: debug = .false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b, stat=info)
end if
if (info == 0) allocate(psb_s_oacc_ell_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_ell_mold
end submodule psb_s_oacc_ell_mold_impl

@ -0,0 +1,25 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_mv_from_coo_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_mv_from_coo(a, b, info)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_s_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irn, a%idiag)
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_s_oacc_ell_mv_from_coo
end submodule psb_s_oacc_ell_mv_from_coo_impl

@ -0,0 +1,24 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_mv_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_mv_from_fmt(a, b, info)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_s_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%psb_s_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag)
end select
end subroutine psb_s_oacc_ell_mv_from_fmt
end submodule psb_s_oacc_ell_mv_from_fmt_impl

@ -0,0 +1,28 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_reallocate_nz_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_reallocate_nz(nz, a)
implicit none
integer(psb_ipk_), intent(in) :: nz
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='s_oacc_ell_reallocate_nz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
call a%psb_s_ell_sparse_mat%reallocate(nz)
call a%set_dev()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_ell_reallocate_nz
end submodule psb_s_oacc_ell_reallocate_nz_impl

@ -0,0 +1,58 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_scal_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_scal(d, a, info, side)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
real(psb_spk_), intent(in) :: d(:)
integer(psb_ipk_), intent(out) :: info
character, intent(in), optional :: side
integer(psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: i, j, m, nzt
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
m = a%get_nrows()
nzt = a%nzt
if (present(side)) then
if (side == 'L') then
!$acc parallel loop collapse(2) present(a, d)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d(i)
end do
end do
else if (side == 'R') then
!$acc parallel loop collapse(2) present(a, d)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d(a%ja(i, j))
end do
end do
end if
else
!$acc parallel loop collapse(2) present(a, d)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d(j)
end do
end do
end if
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_ell_scal
end submodule psb_s_oacc_ell_scal_impl

@ -0,0 +1,39 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_scals_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_scals(d, a, info)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
real(psb_spk_), intent(in) :: d
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: i, j, nzt, m
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
m = a%get_nrows()
nzt = a%nzt
!$acc parallel loop collapse(2) present(a)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d
end do
end do
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_ell_scals
end submodule psb_s_oacc_ell_scals_impl

@ -0,0 +1,66 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_vect_mv_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
implicit none
real(psb_spk_), intent(in) :: alpha, beta
class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
class(psb_s_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nzt
info = psb_success_
m = a%get_nrows()
n = a%get_ncols()
nzt = a%nzt
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
if (a%is_host()) call a%sync()
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info)
call y%set_dev()
contains
subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info)
implicit none
integer(psb_ipk_) :: m, n, nzt
real(psb_spk_), intent(in) :: alpha, beta
real(psb_spk_) :: val(:,:), x(:), y(:)
integer(psb_ipk_) :: ja(:,:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, ii, isz
real(psb_spk_) :: tmp
integer(psb_ipk_), parameter :: vsz = 256
info = 0
!$acc parallel loop vector_length(vsz) private(isz)
do ii = 1, m, vsz
isz = min(vsz, m - ii + 1)
!$acc loop independent private(tmp)
do i = ii, ii + isz - 1
tmp = 0.0_psb_dpk_
!$acc loop seq
do j = 1, nzt
if (ja(i,j) > 0) then
tmp = tmp + val(i,j) * x(ja(i,j))
end if
end do
y(i) = alpha * tmp + beta * y(i)
end do
end do
end subroutine inner_spmv
end subroutine psb_s_oacc_ell_vect_mv
end submodule psb_s_oacc_ell_vect_mv_impl

@ -0,0 +1,53 @@
submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_allocate_mnnz_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_hll_allocate_mnnz(m, n, a, nz)
implicit none
integer(psb_ipk_), intent(in) :: m, n
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act, nz_
character(len=20) :: name='allocate_mnnz'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: hksz, nhacks
call psb_erractionsave(err_act)
info = psb_success_
if (present(nz)) then
nz_ = nz
else
nz_ = 10
end if
call a%psb_s_hll_sparse_mat%allocate(m, n, nz_)
hksz = a%hksz
nhacks = (m + hksz - 1) / hksz
if (.not.allocated(a%val)) then
allocate(a%val(nz_ * m))
allocate(a%ja(nz_ * m))
allocate(a%irn(m))
allocate(a%idiag(m))
allocate(a%hkoffs(nhacks))
end if
a%val = szero
a%ja = -1
a%irn = 0
a%idiag = 0
a%hkoffs = 0
call a%set_dev()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_hll_allocate_mnnz
end submodule psb_s_oacc_hll_allocate_mnnz_impl

@ -0,0 +1,85 @@
submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_hll_cp_from_coo(a, b, info)
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
real(psb_spk_) :: value
integer(psb_ipk_), allocatable :: row_counts(:)
integer(psb_ipk_) :: hacksize, nza
info = psb_success_
hacksize = 32 ! Assuming a default hack size of 32
call a%set_nrows(b%get_nrows())
call a%set_ncols(b%get_ncols())
nz_per_row = a%nzt
if (.not.allocated(a%val)) then
allocate(a%val(nz_per_row * a%get_nrows()))
allocate(a%ja(nz_per_row * a%get_nrows()))
allocate(a%irn(a%get_nrows()))
allocate(a%idiag(a%get_nrows()))
allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
end if
a%val = szero
a%ja = -1
a%irn = 0
a%idiag = 0
allocate(row_counts(a%get_nrows()))
row_counts = 0
nza = b%get_nzeros()
!$acc parallel loop present(b, a, row_counts)
do k = 1, nza
row = b%ia(k)
col = b%ja(k)
value = b%val(k)
if (row_counts(row) < nz_per_row) then
a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value
a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col
row_counts(row) = row_counts(row) + 1
else
info = psb_err_invalid_mat_state_
!goto 9999
end if
end do
a%irn = row_counts
!$acc parallel loop present(a)
do i = 1, a%get_nrows()
do j = 1, nz_per_row
if (a%ja(j + (i - 1) * nz_per_row) == i) then
a%idiag(i) = j
exit
end if
end do
end do
! Calculate hkoffs for HLL format
!$acc parallel loop present(a)
do i = 1, size(a%hkoffs)
a%hkoffs(i) = (i - 1) * hacksize
end do
deallocate(row_counts)
call a%set_dev()
if (info /= 0) goto 9999
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_s_oacc_hll_cp_from_coo
end submodule psb_s_oacc_hll_cp_from_coo_impl

@ -0,0 +1,24 @@
submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_cp_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_hll_cp_from_fmt(a, b, info)
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_s_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%psb_s_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
end select
end subroutine psb_s_oacc_hll_cp_from_fmt
end submodule psb_s_oacc_hll_cp_from_fmt_impl

@ -0,0 +1,86 @@
submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_csmm_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta
real(psb_spk_), intent(in) :: x(:,:)
real(psb_spk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks
logical :: tra
integer(psb_ipk_) :: err_act
character(len=20) :: name = 's_oacc_hll_csmm'
logical, parameter :: debug = .false.
info = psb_success_
call psb_erractionsave(err_act)
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1) < n) then
info = 36
call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
goto 9999
end if
if (size(y,1) < m) then
info = 36
call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
goto 9999
end if
if (tra) then
call a%psb_s_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
else
nxy = min(size(x,2), size(y,2))
nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz
!$acc parallel loop collapse(2) present(a, x, y)
do j = 1, nxy
do i = 1, m
y(i,j) = beta * y(i,j)
end do
end do
!$acc parallel loop present(a, x, y)
do j = 1, nxy
do k = 1, nhacks
do i = a%hkoffs(k), a%hkoffs(k + 1) - 1
y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j)
end do
end do
end do
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_hll_csmm
end submodule psb_s_oacc_hll_csmm_impl

@ -0,0 +1,84 @@
submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_csmv_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta
real(psb_spk_), intent(in) :: x(:)
real(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i, j, m, n, hksz, nhacks
logical :: tra
integer(psb_ipk_) :: err_act
character(len=20) :: name = 's_oacc_hll_csmv'
logical, parameter :: debug = .false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1) < n) then
info = 36
call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
goto 9999
end if
if (size(y,1) < m) then
info = 36
call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
goto 9999
end if
if (tra) then
call a%psb_s_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
else
hksz = a%hksz
nhacks = (a%get_nrows() + hksz - 1) / hksz
!$acc parallel loop present(a, x, y)
do i = 1, m
y(i) = beta * y(i)
end do
! This loop nest cannot be run with collapse, since
! the inner loop extent varies.
!$acc parallel loop present(a, x, y)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j))
end do
end do
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_hll_csmv
end submodule psb_s_oacc_hll_csmv_impl

@ -0,0 +1,86 @@
submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_inner_vect_sv_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta
class(psb_s_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
real(psb_spk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
integer(psb_ipk_) :: err_act
character(len=20) :: name = 's_oacc_hll_inner_vect_sv'
logical, parameter :: debug = .false.
integer(psb_ipk_) :: i, j, nhacks, hksz
call psb_get_erraction(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra .or. (beta /= dzero)) then
call x%sync()
call y%sync()
call a%psb_s_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
select type (xx => x)
type is (psb_s_vect_oacc)
select type(yy => y)
type is (psb_s_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
nhacks = size(a%hkoffs) - 1
hksz = a%hksz
!$acc parallel loop present(a, xx, yy)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i+1) - 1
yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j))
end do
end do
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%psb_s_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%psb_s_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
endif
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info, name, a_err = 'hll_vect_sv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_hll_inner_vect_sv
end submodule psb_s_oacc_hll_inner_vect_sv_impl

@ -0,0 +1,34 @@
submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_mold_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_hll_mold(a, b, info)
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'hll_mold'
logical, parameter :: debug = .false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b, stat=info)
end if
if (info == 0) allocate(psb_s_oacc_hll_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_hll_mold
end submodule psb_s_oacc_hll_mold_impl

@ -0,0 +1,25 @@
submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_mv_from_coo_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_hll_mv_from_coo(a, b, info)
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_s_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_s_oacc_hll_mv_from_coo
end submodule psb_s_oacc_hll_mv_from_coo_impl

@ -0,0 +1,24 @@
submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_mv_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_hll_mv_from_fmt(a, b, info)
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_s_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%psb_s_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
end select
end subroutine psb_s_oacc_hll_mv_from_fmt
end submodule psb_s_oacc_hll_mv_from_fmt_impl

@ -0,0 +1,29 @@
submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_reallocate_nz_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_hll_reallocate_nz(nz, a)
implicit none
integer(psb_ipk_), intent(in) :: nz
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='s_oacc_hll_reallocate_nz'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: hksz, nhacks
call psb_erractionsave(err_act)
info = psb_success_
call a%psb_s_hll_sparse_mat%reallocate(nz)
call a%set_dev()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_hll_reallocate_nz
end submodule psb_s_oacc_hll_reallocate_nz_impl

@ -0,0 +1,62 @@
submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_scal_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_hll_scal(d, a, info, side)
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
real(psb_spk_), intent(in) :: d(:)
integer(psb_ipk_), intent(out) :: info
character, intent(in), optional :: side
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'scal'
integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
hksz = a%hksz
nhacks = (a%get_nrows() + hksz - 1) / hksz
nzt = a%nzt
if (present(side)) then
if (side == 'L') then
! $ a parallel loop collapse(2) present(a, d)
!$acc parallel loop present(a, d)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1
a%val(j) = a%val(j) * d(k)
end do
end do
else if (side == 'R') then
! $ a parallel loop collapse(2) present(a, d)
!$acc parallel loop present(a, d)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
a%val(j) = a%val(j) * d(a%ja(j))
end do
end do
end if
else
! $ a parallel loop collapse(2) present(a, d)
!$acc parallel loop present(a, d)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1)
end do
end do
end if
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_hll_scal
end submodule psb_s_oacc_hll_scal_impl

@ -0,0 +1,40 @@
submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_scals_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_hll_scals(d, a, info)
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
real(psb_spk_), intent(in) :: d
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'scal'
integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
hksz = a%hksz
nhacks = (a%get_nrows() + hksz - 1) / hksz
nzt = a%nzt
! $ a parallel loop collapse(2) present(a)
!$acc parallel loop present(a)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
a%val(j) = a%val(j) * d
end do
end do
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_hll_scals
end submodule psb_s_oacc_hll_scals_impl

@ -0,0 +1,67 @@
submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_vect_mv_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
implicit none
real(psb_spk_), intent(in) :: alpha, beta
class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
class(psb_s_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nhacks, hksz
info = psb_success_
m = a%get_nrows()
n = a%get_ncols()
nhacks = size(a%hkoffs) - 1
hksz = a%hksz
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
if (a%is_host()) call a%sync()
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
call y%set_dev()
contains
subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info)
implicit none
integer(psb_ipk_) :: m, nhacks, hksz
real(psb_spk_), intent(in) :: alpha, beta
real(psb_spk_) :: val(:), x(:), y(:)
integer(psb_ipk_) :: ja(:), hkoffs(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, idx, k
real(psb_spk_) :: tmp
info = 0
!$acc parallel loop present(val, ja, hkoffs, x, y)
do i = 1, nhacks
do k = 0, hksz - 1
idx = hkoffs(i) + k
if (idx <= hkoffs(i + 1) - 1) then
tmp = 0.0_psb_dpk_
!$acc loop seq
do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz
if (ja(j) > 0) then
tmp = tmp + val(j) * x(ja(j))
end if
end do
y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz)
end if
end do
end do
end subroutine inner_spmv
end subroutine psb_s_oacc_hll_vect_mv
end submodule psb_s_oacc_hll_vect_mv_impl

@ -0,0 +1,47 @@
submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_allocate_mnnz_impl
use psb_base_mod
contains
module subroutine psb_z_oacc_ell_allocate_mnnz(m, n, a, nz)
implicit none
integer(psb_ipk_), intent(in) :: m, n
class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act, nz_
character(len=20) :: name='allocate_mnnz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(nz)) then
nz_ = nz
else
nz_ = 10
end if
call a%psb_z_ell_sparse_mat%allocate(m, n, nz_)
if (.not.allocated(a%val)) then
allocate(a%val(m, nz_))
allocate(a%ja(m, nz_))
allocate(a%irn(m))
allocate(a%idiag(m))
end if
a%val = zzero
a%ja = -1
a%irn = 0
a%idiag = 0
call a%set_dev()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_z_oacc_ell_allocate_mnnz
end submodule psb_z_oacc_ell_allocate_mnnz_impl

@ -0,0 +1,78 @@
submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_z_oacc_ell_cp_from_coo(a, b, info)
implicit none
class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_z_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
complex(psb_dpk_) :: value
integer(psb_ipk_), allocatable :: row_counts(:)
integer(psb_ipk_) :: hacksize, nza
info = psb_success_
hacksize = 1
call a%set_nrows(b%get_nrows())
call a%set_ncols(b%get_ncols())
nz_per_row = a%nzt
if (.not.allocated(a%val)) then
allocate(a%val(a%get_nrows(), nz_per_row))
allocate(a%ja(a%get_nrows(), nz_per_row))
allocate(a%irn(a%get_nrows()))
allocate(a%idiag(a%get_nrows()))
end if
a%val = zzero
a%ja = -1
a%irn = 0
a%idiag = 0
allocate(row_counts(a%get_nrows()))
row_counts = 0
nza = b%get_nzeros()
!$acc parallel loop present(b, a, row_counts)
do k = 1, nza
row = b%ia(k)
col = b%ja(k)
value = b%val(k)
if (row_counts(row) < nz_per_row) then
a%val(row, row_counts(row) + 1) = value
a%ja(row, row_counts(row) + 1) = col
row_counts(row) = row_counts(row) + 1
else
info = psb_err_invalid_mat_state_
!goto 9999
end if
end do
a%irn = row_counts
!$acc parallel loop present(a)
do i = 1, a%get_nrows()
do j = 1, nz_per_row
if (a%ja(i, j) == i) then
a%idiag(i) = j
exit
end if
end do
end do
deallocate(row_counts)
call a%set_dev()
if (info /= 0) goto 9999
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_z_oacc_ell_cp_from_coo
end submodule psb_z_oacc_ell_cp_from_coo_impl

@ -0,0 +1,24 @@
submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_cp_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_z_oacc_ell_cp_from_fmt(a, b, info)
implicit none
class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_z_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_z_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%psb_z_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
!$acc update device(a%val, a%ja, a%irn, a%idiag)
end select
end subroutine psb_z_oacc_ell_cp_from_fmt
end submodule psb_z_oacc_ell_cp_from_fmt_impl

@ -0,0 +1,86 @@
submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_csmm_impl
use psb_base_mod
contains
module subroutine psb_z_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
complex(psb_dpk_), intent(in) :: alpha, beta
complex(psb_dpk_), intent(in) :: x(:,:)
complex(psb_dpk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt
logical :: tra
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'z_oacc_ell_csmm'
logical, parameter :: debug = .false.
info = psb_success_
call psb_erractionsave(err_act)
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1) < n) then
info = 36
call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
goto 9999
end if
if (size(y,1) < m) then
info = 36
call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
goto 9999
end if
if (tra) then
call a%psb_z_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
else
nxy = min(size(x,2), size(y,2))
nzt = a%nzt
!$acc parallel loop collapse(2) present(a, x, y)
do j = 1, nxy
do i = 1, m
y(i,j) = beta * y(i,j)
end do
end do
!$acc parallel loop collapse(2) present(a, x, y)
do j = 1, nxy
do i = 1, n
do k = 1, nzt
y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j)
end do
end do
end do
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_z_oacc_ell_csmm
end submodule psb_z_oacc_ell_csmm_impl

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save