Merge branch 'development' into repackage

repack-newsolve
sfilippone 4 months ago
commit a8dcba2964

@ -99,8 +99,8 @@ module psi_c_serial_mod
end subroutine psi_caxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
interface psi_upd_xyz
subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
@ -109,8 +109,8 @@ module psi_c_serial_mod
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_cabgdxyz
end interface psi_abgdxyz
end subroutine psi_c_upd_xyz
end interface psi_upd_xyz
interface psi_xyzw
subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)

@ -99,8 +99,8 @@ module psi_d_serial_mod
end subroutine psi_daxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
interface psi_upd_xyz
subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
@ -109,8 +109,8 @@ module psi_d_serial_mod
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_dabgdxyz
end interface psi_abgdxyz
end subroutine psi_d_upd_xyz
end interface psi_upd_xyz
interface psi_xyzw
subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)

@ -99,8 +99,8 @@ module psi_e_serial_mod
end subroutine psi_eaxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
interface psi_upd_xyz
subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
@ -109,8 +109,8 @@ module psi_e_serial_mod
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_eabgdxyz
end interface psi_abgdxyz
end subroutine psi_e_upd_xyz
end interface psi_upd_xyz
interface psi_xyzw
subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)

@ -99,8 +99,8 @@ module psi_i2_serial_mod
end subroutine psi_i2axpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
interface psi_upd_xyz
subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
@ -109,8 +109,8 @@ module psi_i2_serial_mod
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_i2abgdxyz
end interface psi_abgdxyz
end subroutine psi_i2_upd_xyz
end interface psi_upd_xyz
interface psi_xyzw
subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)

@ -99,8 +99,8 @@ module psi_m_serial_mod
end subroutine psi_maxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
interface psi_upd_xyz
subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
@ -109,8 +109,8 @@ module psi_m_serial_mod
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_mabgdxyz
end interface psi_abgdxyz
end subroutine psi_m_upd_xyz
end interface psi_upd_xyz
interface psi_xyzw
subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)

@ -99,8 +99,8 @@ module psi_s_serial_mod
end subroutine psi_saxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
interface psi_upd_xyz
subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
@ -109,8 +109,8 @@ module psi_s_serial_mod
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_sabgdxyz
end interface psi_abgdxyz
end subroutine psi_s_upd_xyz
end interface psi_upd_xyz
interface psi_xyzw
subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)

@ -99,8 +99,8 @@ module psi_z_serial_mod
end subroutine psi_zaxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
interface psi_upd_xyz
subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
@ -109,8 +109,8 @@ module psi_z_serial_mod
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_zabgdxyz
end interface psi_abgdxyz
end subroutine psi_z_upd_xyz
end interface psi_upd_xyz
interface psi_xyzw
subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)

@ -143,8 +143,8 @@ module psb_c_psblas_mod
end subroutine psb_caxpby
end interface
interface psb_abgdxyz
subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
interface psb_upd_xyz
subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_spk_, psb_ipk_, &
& psb_c_vect_type, psb_cspmat_type
@ -154,8 +154,8 @@ module psb_c_psblas_mod
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_cabgdxyz_vect
end interface psb_abgdxyz
end subroutine psb_c_upd_xyz_vect
end interface psb_upd_xyz
interface psb_geamax
function psb_camax(x, desc_a, info, jx,global)

@ -143,8 +143,8 @@ module psb_d_psblas_mod
end subroutine psb_daxpby
end interface
interface psb_abgdxyz
subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
interface psb_upd_xyz
subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &
& psb_d_vect_type, psb_dspmat_type
@ -154,8 +154,8 @@ module psb_d_psblas_mod
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_dabgdxyz_vect
end interface psb_abgdxyz
end subroutine psb_d_upd_xyz_vect
end interface psb_upd_xyz
interface psb_geamax
function psb_damax(x, desc_a, info, jx,global)

@ -143,8 +143,8 @@ module psb_s_psblas_mod
end subroutine psb_saxpby
end interface
interface psb_abgdxyz
subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
interface psb_upd_xyz
subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_spk_, psb_ipk_, &
& psb_s_vect_type, psb_sspmat_type
@ -154,8 +154,8 @@ module psb_s_psblas_mod
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_sabgdxyz_vect
end interface psb_abgdxyz
end subroutine psb_s_upd_xyz_vect
end interface psb_upd_xyz
interface psb_geamax
function psb_samax(x, desc_a, info, jx,global)

@ -143,8 +143,8 @@ module psb_z_psblas_mod
end subroutine psb_zaxpby
end interface
interface psb_abgdxyz
subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
interface psb_upd_xyz
subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &
& psb_z_vect_type, psb_zspmat_type
@ -154,8 +154,8 @@ module psb_z_psblas_mod
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_zabgdxyz_vect
end interface psb_abgdxyz
end subroutine psb_z_upd_xyz_vect
end interface psb_upd_xyz
interface psb_geamax
function psb_zamax(x, desc_a, info, jx,global)

@ -155,7 +155,7 @@ module psb_c_base_vect_mod
procedure, pass(z) :: axpby_v2 => c_base_axpby_v2
procedure, pass(z) :: axpby_a2 => c_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => c_base_abgdxyz
procedure, pass(z) :: upd_xyz => c_base_upd_xyz
procedure, pass(w) :: xyzw => c_base_xyzw
!
@ -1130,12 +1130,12 @@ contains
end subroutine c_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
! UPD_XYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!> Function base_upd_xyz
!! \memberof psb_c_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@ -1146,7 +1146,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine c_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -1159,11 +1159,11 @@ contains
if (x%is_dev().and.(alpha/=czero)) call x%sync()
if (y%is_dev().and.(beta/=czero)) call y%sync()
if (z%is_dev().and.(delta/=czero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine c_base_abgdxyz
end subroutine c_base_upd_xyz
subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod

@ -102,9 +102,8 @@ module psb_c_vect_mod
procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => c_vect_abgdxyz
procedure, pass(z) :: upd_xyz => c_vect_upd_xyz
procedure, pass(z) :: xyzw => c_vect_xyzw
procedure, pass(y) :: mlt_v => c_vect_mlt_v
procedure, pass(y) :: mlt_a => c_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2
@ -774,7 +773,7 @@ contains
end subroutine c_vect_axpby_a2
subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
subroutine c_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -785,9 +784,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine c_vect_abgdxyz
end subroutine c_vect_upd_xyz
subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod

@ -155,7 +155,7 @@ module psb_d_base_vect_mod
procedure, pass(z) :: axpby_v2 => d_base_axpby_v2
procedure, pass(z) :: axpby_a2 => d_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => d_base_abgdxyz
procedure, pass(z) :: upd_xyz => d_base_upd_xyz
procedure, pass(w) :: xyzw => d_base_xyzw
!
@ -1137,12 +1137,12 @@ contains
end subroutine d_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
! UPD_XYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!> Function base_upd_xyz
!! \memberof psb_d_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@ -1153,7 +1153,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine d_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -1166,11 +1166,11 @@ contains
if (x%is_dev().and.(alpha/=dzero)) call x%sync()
if (y%is_dev().and.(beta/=dzero)) call y%sync()
if (z%is_dev().and.(delta/=dzero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine d_base_abgdxyz
end subroutine d_base_upd_xyz
subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod

@ -102,9 +102,8 @@ module psb_d_vect_mod
procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => d_vect_abgdxyz
procedure, pass(z) :: upd_xyz => d_vect_upd_xyz
procedure, pass(z) :: xyzw => d_vect_xyzw
procedure, pass(y) :: mlt_v => d_vect_mlt_v
procedure, pass(y) :: mlt_a => d_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2
@ -781,7 +780,7 @@ contains
end subroutine d_vect_axpby_a2
subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
subroutine d_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -792,9 +791,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine d_vect_abgdxyz
end subroutine d_vect_upd_xyz
subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod

@ -155,7 +155,7 @@ module psb_s_base_vect_mod
procedure, pass(z) :: axpby_v2 => s_base_axpby_v2
procedure, pass(z) :: axpby_a2 => s_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => s_base_abgdxyz
procedure, pass(z) :: upd_xyz => s_base_upd_xyz
procedure, pass(w) :: xyzw => s_base_xyzw
!
@ -1137,12 +1137,12 @@ contains
end subroutine s_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
! UPD_XYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!> Function base_upd_xyz
!! \memberof psb_s_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@ -1153,7 +1153,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine s_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -1166,11 +1166,11 @@ contains
if (x%is_dev().and.(alpha/=szero)) call x%sync()
if (y%is_dev().and.(beta/=szero)) call y%sync()
if (z%is_dev().and.(delta/=szero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine s_base_abgdxyz
end subroutine s_base_upd_xyz
subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod

@ -102,9 +102,8 @@ module psb_s_vect_mod
procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => s_vect_abgdxyz
procedure, pass(z) :: upd_xyz => s_vect_upd_xyz
procedure, pass(z) :: xyzw => s_vect_xyzw
procedure, pass(y) :: mlt_v => s_vect_mlt_v
procedure, pass(y) :: mlt_a => s_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2
@ -781,7 +780,7 @@ contains
end subroutine s_vect_axpby_a2
subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
subroutine s_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -792,9 +791,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine s_vect_abgdxyz
end subroutine s_vect_upd_xyz
subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod

@ -155,7 +155,7 @@ module psb_z_base_vect_mod
procedure, pass(z) :: axpby_v2 => z_base_axpby_v2
procedure, pass(z) :: axpby_a2 => z_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => z_base_abgdxyz
procedure, pass(z) :: upd_xyz => z_base_upd_xyz
procedure, pass(w) :: xyzw => z_base_xyzw
!
@ -1130,12 +1130,12 @@ contains
end subroutine z_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
! UPD_XYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!> Function base_upd_xyz
!! \memberof psb_z_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@ -1146,7 +1146,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine z_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -1159,11 +1159,11 @@ contains
if (x%is_dev().and.(alpha/=zzero)) call x%sync()
if (y%is_dev().and.(beta/=zzero)) call y%sync()
if (z%is_dev().and.(delta/=zzero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine z_base_abgdxyz
end subroutine z_base_upd_xyz
subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod

@ -102,9 +102,8 @@ module psb_z_vect_mod
procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => z_vect_abgdxyz
procedure, pass(z) :: upd_xyz => z_vect_upd_xyz
procedure, pass(z) :: xyzw => z_vect_xyzw
procedure, pass(y) :: mlt_v => z_vect_mlt_v
procedure, pass(y) :: mlt_a => z_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2
@ -774,7 +773,7 @@ contains
end subroutine z_vect_axpby_a2
subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
subroutine z_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -785,9 +784,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine z_vect_abgdxyz
end subroutine z_vect_upd_xyz
subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod

@ -743,9 +743,9 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info)
end subroutine psb_caddconst_vect
subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect
use psb_base_mod, psb_protect_name => psb_c_upd_xyz_vect
implicit none
type(psb_c_vect_type), intent (inout) :: x
type(psb_c_vect_type), intent (inout) :: y
@ -812,7 +812,7 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@ -822,5 +822,5 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
end subroutine psb_cabgdxyz_vect
end subroutine psb_c_upd_xyz_vect

@ -743,9 +743,9 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info)
end subroutine psb_daddconst_vect
subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect
use psb_base_mod, psb_protect_name => psb_d_upd_xyz_vect
implicit none
type(psb_d_vect_type), intent (inout) :: x
type(psb_d_vect_type), intent (inout) :: y
@ -812,7 +812,7 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@ -822,5 +822,5 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
end subroutine psb_dabgdxyz_vect
end subroutine psb_d_upd_xyz_vect

@ -743,9 +743,9 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info)
end subroutine psb_saddconst_vect
subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect
use psb_base_mod, psb_protect_name => psb_s_upd_xyz_vect
implicit none
type(psb_s_vect_type), intent (inout) :: x
type(psb_s_vect_type), intent (inout) :: y
@ -812,7 +812,7 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@ -822,5 +822,5 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
end subroutine psb_sabgdxyz_vect
end subroutine psb_s_upd_xyz_vect

@ -743,9 +743,9 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info)
end subroutine psb_zaddconst_vect
subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect
use psb_base_mod, psb_protect_name => psb_z_upd_xyz_vect
implicit none
type(psb_z_vect_type), intent (inout) :: x
type(psb_z_vect_type), intent (inout) :: y
@ -812,7 +812,7 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@ -822,5 +822,5 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
end subroutine psb_zabgdxyz_vect
end subroutine psb_z_upd_xyz_vect

@ -1568,7 +1568,7 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine caxpbyv2
subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@ -1582,7 +1582,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='cabgdxyz'
name='c_upd_xyz'
info = psb_success_
if (m.lt.0) then
@ -1791,7 +1791,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
end subroutine psi_cabgdxyz
end subroutine psi_c_upd_xyz
subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@ -1808,7 +1808,7 @@ subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='cabgdxyz'
name='c_xyzw'
info = psb_success_
if (m.lt.0) then

@ -1568,7 +1568,7 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine daxpbyv2
subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@ -1582,7 +1582,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='dabgdxyz'
name='d_upd_xyz'
info = psb_success_
if (m.lt.0) then
@ -1791,7 +1791,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
end subroutine psi_dabgdxyz
end subroutine psi_d_upd_xyz
subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@ -1808,7 +1808,7 @@ subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='dabgdxyz'
name='d_xyzw'
info = psb_success_
if (m.lt.0) then

@ -1568,7 +1568,7 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine eaxpbyv2
subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@ -1582,7 +1582,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='eabgdxyz'
name='e_upd_xyz'
info = psb_success_
if (m.lt.0) then
@ -1791,7 +1791,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
end subroutine psi_eabgdxyz
end subroutine psi_e_upd_xyz
subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@ -1808,7 +1808,7 @@ subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='eabgdxyz'
name='e_xyzw'
info = psb_success_
if (m.lt.0) then

@ -1568,7 +1568,7 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine i2axpbyv2
subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@ -1582,7 +1582,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='i2abgdxyz'
name='i2_upd_xyz'
info = psb_success_
if (m.lt.0) then
@ -1791,7 +1791,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
end subroutine psi_i2abgdxyz
end subroutine psi_i2_upd_xyz
subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@ -1808,7 +1808,7 @@ subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='i2abgdxyz'
name='i2_xyzw'
info = psb_success_
if (m.lt.0) then

@ -1568,7 +1568,7 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine maxpbyv2
subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@ -1582,7 +1582,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='mabgdxyz'
name='m_upd_xyz'
info = psb_success_
if (m.lt.0) then
@ -1791,7 +1791,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
end subroutine psi_mabgdxyz
end subroutine psi_m_upd_xyz
subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@ -1808,7 +1808,7 @@ subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='mabgdxyz'
name='m_xyzw'
info = psb_success_
if (m.lt.0) then

@ -1568,7 +1568,7 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine saxpbyv2
subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@ -1582,7 +1582,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='sabgdxyz'
name='s_upd_xyz'
info = psb_success_
if (m.lt.0) then
@ -1791,7 +1791,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
end subroutine psi_sabgdxyz
end subroutine psi_s_upd_xyz
subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@ -1808,7 +1808,7 @@ subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='sabgdxyz'
name='s_xyzw'
info = psb_success_
if (m.lt.0) then

@ -1568,7 +1568,7 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine zaxpbyv2
subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@ -1582,7 +1582,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='zabgdxyz'
name='z_upd_xyz'
info = psb_success_
if (m.lt.0) then
@ -1791,7 +1791,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
end subroutine psi_zabgdxyz
end subroutine psi_z_upd_xyz
subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@ -1808,7 +1808,7 @@ subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='zabgdxyz'
name='z_xyzw'
info = psb_success_
if (m.lt.0) then

1
configure vendored

@ -7549,6 +7549,7 @@ fi
# Custom test : do we have a module or include for MPI Fortran interface?
if test x"$pac_cv_serial_mpi" == x"yes" ; then
FDEFINES="$psblas_cv_define_prepend-DSERIAL_MPI $psblas_cv_define_prepend-DMPI_MOD $FDEFINES";
CDEFINES="-DSERIAL_MPI $CDEFINES"
else
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking MPI Fortran 2008 interface" >&5
printf %s "checking MPI Fortran 2008 interface... " >&6; }

@ -518,6 +518,7 @@ fi
# Custom test : do we have a module or include for MPI Fortran interface?
if test x"$pac_cv_serial_mpi" == x"yes" ; then
FDEFINES="$psblas_cv_define_prepend-DSERIAL_MPI $psblas_cv_define_prepend-DMPI_MOD $FDEFINES";
CDEFINES="-DSERIAL_MPI $CDEFINES"
else
PAC_FORTRAN_CHECK_HAVE_MPI_MOD_F08()
if test x"$pac_cv_mpi_f08" == x"yes" ; then

@ -255,7 +255,7 @@ int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVe
return(i);
}
int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta,
int upd_xyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta,
cuFloatComplex gamma, cuFloatComplex delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ)
{ int j=0, i=0;
@ -268,7 +268,7 @@ int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex
if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED;
spgpuCabgdxyz(handle,n, alpha,beta,gamma,delta,
spgpuCupd_xyz(handle,n, alpha,beta,gamma,delta,
(cuFloatComplex *)devVecX->v_,(cuFloatComplex *) devVecY->v_,(cuFloatComplex *) devVecZ->v_);
return(i);
}

@ -69,7 +69,7 @@ int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB);
int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY);
int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta,
int upd_xyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta,
cuFloatComplex gamma, cuFloatComplex delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ);
int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex a,cuFloatComplex b,

@ -241,7 +241,7 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX,
return(i);
}
int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta,
int upd_xyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ)
{ int j=0, i=0;
int pitch = 0;
@ -253,7 +253,7 @@ int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, do
if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED;
spgpuDabgdxyz(handle,n, alpha,beta,gamma,delta,
spgpuDupd_xyz(handle,n, alpha,beta,gamma,delta,
(double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_);
return(i);
}

@ -67,7 +67,7 @@ int asumMultiVecDeviceDouble(double* y_res, int n, void* devVecA);
int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB);
int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY);
int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta,
int upd_xyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ);
int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f,
void* devMultiVecX, void* devMultiVecY,

@ -90,7 +90,7 @@ module psb_c_cuda_vect_mod
procedure, pass(x) :: dot_a => c_cuda_dot_a
procedure, pass(y) :: axpby_v => c_cuda_axpby_v
procedure, pass(y) :: axpby_a => c_cuda_axpby_a
procedure, pass(z) :: abgdxyz => c_cuda_abgdxyz
procedure, pass(z) :: upd_xyz => c_cuda_upd_xyz
procedure, pass(y) :: mlt_v => c_cuda_mlt_v
procedure, pass(y) :: mlt_a => c_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2
@ -912,7 +912,7 @@ contains
end subroutine c_cuda_axpby_v
subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine c_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -946,7 +946,7 @@ contains
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
info = upd_xyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
@ -972,7 +972,7 @@ contains
call z%axpby(m,gamma,y,delta,info)
end if
end subroutine c_cuda_abgdxyz
end subroutine c_cuda_upd_xyz
subroutine c_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod

@ -313,16 +313,16 @@ module psb_c_vectordev_mod
end function axpbyMultiVecDeviceFloatComplex
end interface
interface abgdxyzMultiVecDevice
function abgdxyzMultiVecDeviceFloatComplex(n,alpha,beta,gamma,delta,deviceVecX,&
interface upd_xyzMultiVecDevice
function upd_xyzMultiVecDeviceFloatComplex(n,alpha,beta,gamma,delta,deviceVecX,&
& deviceVecY,deviceVecZ) &
& result(res) bind(c,name='abgdxyzMultiVecDeviceFloatComplex')
& result(res) bind(c,name='upd_xyzMultiVecDeviceFloatComplex')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
complex(c_float_complex), value :: alpha, beta,gamma,delta
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ
end function abgdxyzMultiVecDeviceFloatComplex
end function upd_xyzMultiVecDeviceFloatComplex
end interface
interface xyzwMultiVecDevice

@ -90,7 +90,7 @@ module psb_d_cuda_vect_mod
procedure, pass(x) :: dot_a => d_cuda_dot_a
procedure, pass(y) :: axpby_v => d_cuda_axpby_v
procedure, pass(y) :: axpby_a => d_cuda_axpby_a
procedure, pass(z) :: abgdxyz => d_cuda_abgdxyz
procedure, pass(z) :: upd_xyz => d_cuda_upd_xyz
procedure, pass(y) :: mlt_v => d_cuda_mlt_v
procedure, pass(y) :: mlt_a => d_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2
@ -912,7 +912,7 @@ contains
end subroutine d_cuda_axpby_v
subroutine d_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine d_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -946,7 +946,7 @@ contains
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
info = upd_xyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
@ -972,7 +972,7 @@ contains
call z%axpby(m,gamma,y,delta,info)
end if
end subroutine d_cuda_abgdxyz
end subroutine d_cuda_upd_xyz
subroutine d_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod

@ -313,16 +313,16 @@ module psb_d_vectordev_mod
end function axpbyMultiVecDeviceDouble
end interface
interface abgdxyzMultiVecDevice
function abgdxyzMultiVecDeviceDouble(n,alpha,beta,gamma,delta,deviceVecX,&
interface upd_xyzMultiVecDevice
function upd_xyzMultiVecDeviceDouble(n,alpha,beta,gamma,delta,deviceVecX,&
& deviceVecY,deviceVecZ) &
& result(res) bind(c,name='abgdxyzMultiVecDeviceDouble')
& result(res) bind(c,name='upd_xyzMultiVecDeviceDouble')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
real(c_double), value :: alpha, beta,gamma,delta
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ
end function abgdxyzMultiVecDeviceDouble
end function upd_xyzMultiVecDeviceDouble
end interface
interface xyzwMultiVecDevice

@ -90,7 +90,7 @@ module psb_s_cuda_vect_mod
procedure, pass(x) :: dot_a => s_cuda_dot_a
procedure, pass(y) :: axpby_v => s_cuda_axpby_v
procedure, pass(y) :: axpby_a => s_cuda_axpby_a
procedure, pass(z) :: abgdxyz => s_cuda_abgdxyz
procedure, pass(z) :: upd_xyz => s_cuda_upd_xyz
procedure, pass(y) :: mlt_v => s_cuda_mlt_v
procedure, pass(y) :: mlt_a => s_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2
@ -912,7 +912,7 @@ contains
end subroutine s_cuda_axpby_v
subroutine s_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine s_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -946,7 +946,7 @@ contains
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
info = upd_xyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
@ -972,7 +972,7 @@ contains
call z%axpby(m,gamma,y,delta,info)
end if
end subroutine s_cuda_abgdxyz
end subroutine s_cuda_upd_xyz
subroutine s_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod

@ -313,16 +313,16 @@ module psb_s_vectordev_mod
end function axpbyMultiVecDeviceFloat
end interface
interface abgdxyzMultiVecDevice
function abgdxyzMultiVecDeviceFloat(n,alpha,beta,gamma,delta,deviceVecX,&
interface upd_xyzMultiVecDevice
function upd_xyzMultiVecDeviceFloat(n,alpha,beta,gamma,delta,deviceVecX,&
& deviceVecY,deviceVecZ) &
& result(res) bind(c,name='abgdxyzMultiVecDeviceFloat')
& result(res) bind(c,name='upd_xyzMultiVecDeviceFloat')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
real(c_float), value :: alpha, beta,gamma,delta
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ
end function abgdxyzMultiVecDeviceFloat
end function upd_xyzMultiVecDeviceFloat
end interface
interface xyzwMultiVecDevice

@ -90,7 +90,7 @@ module psb_z_cuda_vect_mod
procedure, pass(x) :: dot_a => z_cuda_dot_a
procedure, pass(y) :: axpby_v => z_cuda_axpby_v
procedure, pass(y) :: axpby_a => z_cuda_axpby_a
procedure, pass(z) :: abgdxyz => z_cuda_abgdxyz
procedure, pass(z) :: upd_xyz => z_cuda_upd_xyz
procedure, pass(y) :: mlt_v => z_cuda_mlt_v
procedure, pass(y) :: mlt_a => z_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2
@ -912,7 +912,7 @@ contains
end subroutine z_cuda_axpby_v
subroutine z_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
subroutine z_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@ -946,7 +946,7 @@ contains
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
info = upd_xyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
@ -972,7 +972,7 @@ contains
call z%axpby(m,gamma,y,delta,info)
end if
end subroutine z_cuda_abgdxyz
end subroutine z_cuda_upd_xyz
subroutine z_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod

@ -313,16 +313,16 @@ module psb_z_vectordev_mod
end function axpbyMultiVecDeviceDoubleComplex
end interface
interface abgdxyzMultiVecDevice
function abgdxyzMultiVecDeviceDoubleComplex(n,alpha,beta,gamma,delta,deviceVecX,&
interface upd_xyzMultiVecDevice
function upd_xyzMultiVecDeviceDoubleComplex(n,alpha,beta,gamma,delta,deviceVecX,&
& deviceVecY,deviceVecZ) &
& result(res) bind(c,name='abgdxyzMultiVecDeviceDoubleComplex')
& result(res) bind(c,name='upd_xyzMultiVecDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
complex(c_double_complex), value :: alpha, beta,gamma,delta
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ
end function abgdxyzMultiVecDeviceDoubleComplex
end function upd_xyzMultiVecDeviceDoubleComplex
end interface
interface xyzwMultiVecDevice

@ -11,15 +11,15 @@ LIBNAME=$(UP)/libspgpu.a
CINCLUDES=-I$(INCDIR)
OBJS=cabs.o camax.o casum.o caxpby.o caxy.o cdot.o cgath.o \
cnrm2.o cscal.o cscat.o csetscal.o cabgdxyz.o\
dabs.o damax.o dasum.o daxpby.o daxy.o ddot.o dgath.o dabgdxyz.o\
cnrm2.o cscal.o cscat.o csetscal.o cupd_xyz.o\
dabs.o damax.o dasum.o daxpby.o daxy.o ddot.o dgath.o dupd_xyz.o\
dia_cspmv.o dia_dspmv.o dia_sspmv.o dia_zspmv.o dnrm2.o \
dscal.o dscat.o dsetscal.o ell_ccsput.o ell_cspmv.o \
ell_dcsput.o ell_dspmv.o ell_scsput.o ell_sspmv.o ell_zcsput.o ell_zspmv.o \
hdia_cspmv.o hdia_dspmv.o hdia_sspmv.o hdia_zspmv.o hell_cspmv.o hell_dspmv.o \
hell_sspmv.o hell_zspmv.o igath.o iscat.o isetscal.o sabs.o samax.o sasum.o \
saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o sabgdxyz.o\
zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o zabgdxyz.o \
saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o supd_xyz.o\
zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o zupd_xyz.o \
sxyzw.o cxyzw.o dxyzw.o zxyzw.o
objs: $(OBJS)

@ -31,7 +31,7 @@ extern "C"
#define BLOCK_SIZE 512
__global__ void spgpuCabgdxyz_krn(int n, cuFloatComplex alpha, cuFloatComplex beta,
__global__ void spgpuCupd_xyz_krn(int n, cuFloatComplex alpha, cuFloatComplex beta,
cuFloatComplex gamma, cuFloatComplex delta,
cuFloatComplex * x, cuFloatComplex *y, cuFloatComplex *z)
{
@ -55,7 +55,7 @@ __global__ void spgpuCabgdxyz_krn(int n, cuFloatComplex alpha, cuFloatComplex
}
void spgpuCabgdxyz(spgpuHandle_t handle,
void spgpuCupd_xyz(spgpuHandle_t handle,
int n,
cuFloatComplex alpha,
cuFloatComplex beta,
@ -74,7 +74,7 @@ void spgpuCabgdxyz(spgpuHandle_t handle,
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuCabgdxyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
spgpuCupd_xyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
x, y, z);
}

@ -31,7 +31,7 @@ extern "C"
#define BLOCK_SIZE 512
__global__ void spgpuDabgdxyz_krn(int n, double alpha, double beta, double gamma, double delta,
__global__ void spgpuDupd_xyz_krn(int n, double alpha, double beta, double gamma, double delta,
double* x, double *y, double *z)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
@ -54,7 +54,7 @@ __global__ void spgpuDabgdxyz_krn(int n, double alpha, double beta, double gamma
}
void spgpuDabgdxyz(spgpuHandle_t handle,
void spgpuDupd_xyz(spgpuHandle_t handle,
int n,
double alpha,
double beta,
@ -73,7 +73,7 @@ void spgpuDabgdxyz(spgpuHandle_t handle,
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuDabgdxyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
spgpuDupd_xyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
x, y, z);
}

@ -31,7 +31,7 @@ extern "C"
#define BLOCK_SIZE 512
__global__ void spgpuSabgdxyz_krn(int n, float alpha, float beta, float gamma, float delta,
__global__ void spgpuSupd_xyz_krn(int n, float alpha, float beta, float gamma, float delta,
float* x, float *y, float *z)
{
int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
@ -54,7 +54,7 @@ __global__ void spgpuSabgdxyz_krn(int n, float alpha, float beta, float gamma, f
}
void spgpuSabgdxyz(spgpuHandle_t handle,
void spgpuSupd_xyz(spgpuHandle_t handle,
int n,
float alpha,
float beta,
@ -73,7 +73,7 @@ void spgpuSabgdxyz(spgpuHandle_t handle,
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuSabgdxyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
spgpuSupd_xyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
x, y, z);
}

@ -31,7 +31,7 @@ extern "C"
#define BLOCK_SIZE 512
__global__ void spgpuZabgdxyz_krn(int n, cuDoubleComplex alpha, cuDoubleComplex beta,
__global__ void spgpuZupd_xyz_krn(int n, cuDoubleComplex alpha, cuDoubleComplex beta,
cuDoubleComplex gamma, cuDoubleComplex delta,
cuDoubleComplex * x, cuDoubleComplex *y, cuDoubleComplex *z)
{
@ -55,7 +55,7 @@ __global__ void spgpuZabgdxyz_krn(int n, cuDoubleComplex alpha, cuDoubleComplex
}
void spgpuZabgdxyz(spgpuHandle_t handle,
void spgpuZupd_xyz(spgpuHandle_t handle,
int n,
cuDoubleComplex alpha,
cuDoubleComplex beta,
@ -74,7 +74,7 @@ void spgpuZabgdxyz(spgpuHandle_t handle,
num_blocks = num_blocks_mp*num_mp;
dim3 grid(num_blocks);
spgpuZabgdxyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
spgpuZupd_xyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
x, y, z);
}

@ -182,7 +182,7 @@ void spgpuSaxpby(spgpuHandle_t handle,
__device float* x);
void spgpuSabgdxyz(spgpuHandle_t handle,
void spgpuSupd_xyz(spgpuHandle_t handle,
int n,
float alpha,
float beta,
@ -487,7 +487,7 @@ void spgpuDaxpby(spgpuHandle_t handle,
__device double* x);
void spgpuDabgdxyz(spgpuHandle_t handle,
void spgpuDupd_xyz(spgpuHandle_t handle,
int n,
double alpha,
double beta,
@ -789,7 +789,7 @@ void spgpuCaxpby(spgpuHandle_t handle,
__device cuFloatComplex* x);
void spgpuCabgdxyz(spgpuHandle_t handle,
void spgpuCupd_xyz(spgpuHandle_t handle,
int n,
cuFloatComplex alpha,
cuFloatComplex beta,
@ -1092,7 +1092,7 @@ void spgpuZaxpby(spgpuHandle_t handle,
__device cuDoubleComplex* x);
void spgpuZabgdxyz(spgpuHandle_t handle,
void spgpuZupd_xyz(spgpuHandle_t handle,
int n,
cuDoubleComplex alpha,
cuDoubleComplex beta,

@ -241,7 +241,7 @@ int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX,
return(i);
}
int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta,
int upd_xyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ)
{ int j=0, i=0;
int pitch = 0;
@ -253,7 +253,7 @@ int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float
if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED;
spgpuSabgdxyz(handle,n, alpha,beta,gamma,delta,
spgpuSupd_xyz(handle,n, alpha,beta,gamma,delta,
(float*)devVecX->v_,(float*) devVecY->v_,(float*) devVecZ->v_);
return(i);
}

@ -67,7 +67,7 @@ int asumMultiVecDeviceFloat(float* y_res, int n, void* devVecA);
int dotMultiVecDeviceFloat(float* y_res, int n, void* devVecA, void* devVecB);
int axpbyMultiVecDeviceFloat(int n, float alpha, void* devVecX, float beta, void* devVecY);
int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta,
int upd_xyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ);
int xyzwMultiVecDeviceFloat(int n,float a,float b, float c, float d, float e, float f,
void* devMultiVecX, void* devMultiVecY,

@ -234,7 +234,7 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMulti
return(i);
}
int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha,
int upd_xyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha,
cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ)
{ int j=0, i=0;
@ -247,7 +247,7 @@ int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha,
if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED;
spgpuZabgdxyz(handle,n, alpha,beta,gamma,delta,
spgpuZupd_xyz(handle,n, alpha,beta,gamma,delta,
(cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_,(cuDoubleComplex *) devVecZ->v_);
return(i);
}

@ -77,7 +77,7 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n,
int axpbyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void* devVecX,
cuDoubleComplex beta, void* devVecY);
int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha,
int upd_xyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha,
cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ);
int xyzwMultiVecDeviceDoubleComplex(int n,cuDoubleComplex a, cuDoubleComplex b,

@ -129,8 +129,6 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
logical :: do_cond
character(len=20) :: name
character(len=*), parameter :: methdname='CG'
logical, parameter :: do_timings=.true.
integer(psb_ipk_), save :: cg_vect=-1, cg_mv=-1, cg_prec=-1
info = psb_success_
name = 'psb_dcg'
@ -151,12 +149,6 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
call psb_errpush(info,name)
goto 9999
endif
if ((do_timings).and.(cg_vect==-1)) &
& cg_vect = psb_get_timer_idx("CG: vector ops ")
if ((do_timings).and.(cg_mv==-1)) &
& cg_mv = psb_get_timer_idx("CG: MV product")
if ((do_timings).and.(cg_prec==-1)) &
& cg_prec = psb_get_timer_idx("CG: preconditioner")
mglob = desc_a%get_global_rows()
@ -227,21 +219,17 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
! =
! = r0 = b-Ax0
! =
if (do_timings) call psb_tic(cg_vect)
if (itx>= itmax_) exit restart
it = 0
call psb_geaxpby(done,b,dzero,r,desc_a,info)
if (do_timings) call psb_toc(cg_vect)
if (do_timings) call psb_tic(cg_mv)
if (info == psb_success_) call psb_spmm(-done,a,x,done,r,desc_a,info,work=aux)
if (info /= psb_success_) then
info=psb_err_from_subroutine_non_
call psb_errpush(info,name)
goto 9999
end if
if (do_timings) call psb_toc(cg_mv)
if (do_timings) call psb_tic(cg_vect)
rho = dzero
call psb_init_conv(methdname,istop_,itrace_,itmax_,a,x,b,eps,desc_a,stopdat,info)
@ -249,18 +237,13 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
call psb_errpush(psb_err_from_subroutine_non_,name)
goto 9999
End If
if (do_timings) call psb_toc(cg_vect)
iteration: do
it = it + 1
itx = itx + 1
if (do_timings) call psb_tic(cg_prec)
call prec%apply(r,z,desc_a,info,work=aux)
if (do_timings) call psb_toc(cg_prec)
if (do_timings) call psb_tic(cg_vect)
call prec%apply(r,z,desc_a,info,work=aux)
rho_old = rho
rho = psb_gedot(r,z,desc_a,info)
@ -271,18 +254,13 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
if (debug_level >= psb_debug_ext_)&
& write(debug_unit,*) me,' ',trim(name),&
& ': CG Iteration breakdown rho'
if (do_timings) call psb_toc(cg_vect)
exit iteration
endif
beta = rho/rho_old
call psb_geaxpby(done,z,beta,p,desc_a,info)
end if
if (do_timings) call psb_toc(cg_vect)
if (do_timings) call psb_tic(cg_mv)
call psb_spmm(done,a,p,dzero,q,desc_a,info,work=aux)
if (do_timings) call psb_toc(cg_mv)
if (do_timings) call psb_tic(cg_vect)
sigma = psb_gedot(p,q,desc_a,info)
if (sigma == dzero) then
if (debug_level >= psb_debug_ext_)&
@ -315,7 +293,6 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
end do iteration
end do restart
if (do_timings) call psb_toc(cg_vect)
if (do_cond) then
if (me == psb_root_) then
#if defined(HAVE_LAPACK)

@ -16,48 +16,26 @@ LDLIBS=$(PSBGPULDLIBS)
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES)
ZTOBJS=z_file_spmv.o data_input.o
CTOBJS=c_file_spmv.o data_input.o
DTOBJS=d_file_spmv.o data_input.o
STOBJS=s_file_spmv.o data_input.o
DPGOBJS=dpdegenmv.o
SPGOBJS=spdegenmv.o
DPGOBJS=dpdegenmv.o data_input.o
SPGOBJS=spdegenmv.o data_input.o
EXEDIR=./runs
all: dir pgen file
all: dir pgen
pgen: dpdegenmv spdegenmv
file: s_file_spmv c_file_spmv d_file_spmv z_file_spmv
dpdegenmv spdegenmv s_file_spmv c_file_spmv d_file_spmv z_file_spmv: dir
dpdegenmv spdegenmv: dir
dir:
(if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi)
dpdegenmv: $(DPGOBJS)
dpdegenmv.o spdegenmv.o: data_input.o
dpdegenmv: $(DPGOBJS)
$(FLINK) $(LOPT) $(DPGOBJS) -fopenmp -o dpdegenmv $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS)
/bin/mv dpdegenmv $(EXEDIR)
spdegenmv: $(SPGOBJS)
$(FLINK) $(LOPT) $(SPGOBJS) -o spdegenmv $(PSBLAS_LIB) $(LDLIBS)
/bin/mv spdegenmv $(EXEDIR)
z_file_spmv: $(ZTOBJS)
$(FLINK) $(LOPT) $(ZTOBJS) -o z_file_spmv $(PSBLAS_LIB) $(LDLIBS)
/bin/mv z_file_spmv $(EXEDIR)
c_file_spmv: $(CTOBJS)
$(FLINK) $(LOPT) $(CTOBJS) -o c_file_spmv $(PSBLAS_LIB) $(LDLIBS)
/bin/mv c_file_spmv $(EXEDIR)
d_file_spmv: $(DTOBJS)
$(FLINK) $(LOPT) $(DTOBJS) -o d_file_spmv $(PSBLAS_LIB) $(LDLIBS)
/bin/mv d_file_spmv $(EXEDIR)
s_file_spmv: $(STOBJS)
$(FLINK) $(LOPT) $(STOBJS) -o s_file_spmv $(PSBLAS_LIB) $(LDLIBS)
/bin/cp s_file_spmv $(EXEDIR)
d_file_spmv.o s_file_spmv.o z_file_spmv.o c_file_spmv.o: data_input.o
clean:
/bin/rm -f $(DTOBJS) $(STOBJS) $(DPGOBJS) $(SPGOBJS) $(ZTOBJS) $(CTOBJS) \
$(EXEDIR)/dpdegenmv $(EXEDIR)/spdegenmv \
$(EXEDIR)/d_file_spmv $(EXEDIR)/s_file_spmv \
$(EXEDIR)/z_file_spmv $(EXEDIR)/c_file_spmv
$(EXEDIR)/dpdegenmv $(EXEDIR)/spdegenmv *mod
lib:
(cd ../../; make library)

@ -1,491 +0,0 @@
!
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
program c_file_spmv
use psb_base_mod
use psb_util_mod
use psb_ext_mod
#ifdef HAVE_GPU
use psb_gpu_mod
#endif
use data_input
implicit none
! input parameters
character(len=200) :: mtrx_file
! sparse matrices
type(psb_cspmat_type) :: a, aux_a, agpu
! dense matrices
complex(psb_spk_), allocatable, target :: aux_b(:,:), d(:)
complex(psb_spk_), allocatable , save :: x_col_glob(:), r_col_glob(:)
complex(psb_spk_), pointer :: b_col_glob(:)
type(psb_c_vect_type) :: b_col, x_col, r_col
type(psb_c_vect_type) :: xg, bg, xv, bv
#ifdef HAVE_GPU
type(psb_c_vect_gpu) :: vmold
#endif
complex(psb_spk_), allocatable :: xc1(:),xc2(:)
! communications data structure
type(psb_desc_type):: desc_a
type(psb_ctxt_type) :: ctxt
integer :: iam, np
integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes
real(psb_spk_) :: damatsize, dgmatsize
complex(psb_spk_) :: err, eps
character(len=5) :: acfmt, agfmt
character(len=20) :: name
character(len=2) :: filefmt
integer, parameter :: iunit=12
integer, parameter :: times=2000
integer, parameter :: ntests=200, ngpu=50, ncnv=20
type(psb_c_coo_sparse_mat), target :: acoo
type(psb_c_csr_sparse_mat), target :: acsr
type(psb_c_ell_sparse_mat), target :: aell
type(psb_c_hll_sparse_mat), target :: ahll
#ifdef HAVE_GPU
type(psb_c_elg_sparse_mat), target :: aelg
type(psb_c_csrg_sparse_mat), target :: acsrg
type(psb_c_hybg_sparse_mat), target :: ahybg
type(psb_c_hlg_sparse_mat), target :: ahlg
#endif
class(psb_c_base_sparse_mat), pointer :: acmold, agmold
! other variables
integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg
integer :: internal, m,ii,nnzero
real(psb_dpk_) :: t0,t1, t2, tprec, flops
real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,&
& tcnvcsr, tcnvc1, tcnvgpu, tcnvg1
integer :: nrhs, nrow, n_row, dim, nv, ne
integer, allocatable :: ivg(:), ipv(:)
call psb_init(ctxt)
call psb_info(ctxt,iam,np)
#ifdef HAVE_GPU
call psb_gpu_init(ctxt)
#endif
if (iam < 0) then
! This should not happen, but just in case
call psb_exit(ctxt)
stop
endif
name='file_spmv'
if(psb_get_errstatus() /= 0) goto 9999
info=psb_success_
call psb_set_errverbosity(2)
if (iam == psb_root_) then
write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
write(*,*) 'This is the ',trim(name),' sample program'
end if
#ifdef HAVE_GPU
write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount()
write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName())
#endif
if (iam == 0) then
write(*,*) 'Matrix? '
call read_data(mtrx_file,psb_inp_unit)
write(*,*) 'file format'
call read_data(filefmt,psb_inp_unit)
write(*,*) 'CPU format'
call read_data(acfmt,psb_inp_unit)
write(*,*) 'GPU format'
call read_data(agfmt,psb_inp_unit)
write(*,*) 'distribution '
call read_data(ipart,psb_inp_unit)
write(*,*) 'Read all data, going on'
end if
call psb_bcast(ctxt,mtrx_file)
call psb_bcast(ctxt,filefmt)
call psb_bcast(ctxt,acfmt)
call psb_bcast(ctxt,agfmt)
call psb_bcast(ctxt,ipart)
call psb_barrier(ctxt)
t0 = psb_wtime()
! read the input matrix to be processed and (possibly) the rhs
nrhs = 1
if (iam==psb_root_) then
select case(psb_toupper(filefmt))
case('MM')
! For Matrix Market we have an input file for the matrix
! and an (optional) second file for the RHS.
call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file)
case ('HB')
! For Harwell-Boeing we have a single file which may or may not
! contain an RHS.
call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file)
case default
info = -1
write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt
end select
if (info /= 0) then
write(psb_err_unit,*) 'Error while reading input matrix '
call psb_abort(ctxt)
end if
!
! Always get nnz from original matrix.
! Some formats add fill-in and do not keep track
! of how many were added. So if the original matrix
! contained some extra zeros, the count of entries
! is not recoverable exactly.
!
nrt = aux_a%get_nrows()
annz = aux_a%get_nzeros()
call psb_bcast(ctxt,annz)
call psb_bcast(ctxt,nrt)
write(psb_out_unit,'("Generating an rhs...")')
write(psb_out_unit,'(" ")')
call psb_realloc(nrt,1,aux_b,info)
if (info /= 0) then
call psb_errpush(4000,name)
goto 9999
endif
b_col_glob => aux_b(:,1)
do i=1, nrt
b_col_glob(i) = 1.d0
enddo
else
call psb_bcast(ctxt,annz)
call psb_bcast(ctxt,nrt)
end if
select case(psb_toupper(acfmt))
case('COO')
acmold => acoo
case('CSR')
acmold => acsr
case('ELL')
acmold => aell
case('HLL')
acmold => ahll
case default
write(*,*) 'Unknown format defaulting to CSR'
acmold => acsr
end select
#ifdef HAVE_GPU
select case(psb_toupper(agfmt))
case('ELG')
agmold => aelg
case('HLG')
agmold => ahlg
case('CSRG')
agmold => acsrg
case('HYBG')
agmold => ahybg
case default
write(*,*) 'Unknown format defaulting to HLG'
agmold => ahlg
end select
#endif
! switch over different partition types
if (ipart == 0) then
call psb_barrier(ctxt)
if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")')
allocate(ivg(nrt),ipv(np))
do i=1,nrt
call part_block(i,nrt,np,ipv,nv)
ivg(i) = ipv(1)
enddo
call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg)
else if (ipart == 2) then
if (iam==psb_root_) then
write(psb_out_unit,'("Partition type: graph")')
write(psb_out_unit,'(" ")')
! write(psb_err_unit,'("Build type: graph")')
call build_mtpart(aux_a,np)
endif
call psb_barrier(ctxt)
call distr_mtpart(psb_root_,ctxt)
call getv_mtpart(ivg)
call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg)
else
if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")')
call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block)
end if
call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_)
t2 = psb_wtime() - t0
call psb_amx(ctxt, t2)
if (iam==psb_root_) then
write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2
write(psb_out_unit,'(" ")')
end if
call a%cscnv(aux_a,info,mold=acoo)
tcnvcsr = 0
tcnvgpu = 0
nr = desc_a%get_local_rows()
nrg = desc_a%get_global_rows()
call psb_geall(x_col,desc_a,info)
do i=1, nr
call desc_a%l2g(i,ig,info)
call psb_geins(ione,(/ig/),(/(cone + (cone*ig)/nrg)/),x_col,desc_a,info)
end do
call psb_geasb(x_col,desc_a,info)
do j=1, ncnv
call aux_a%cscnv(a,info,mold=acoo)
call psb_barrier(ctxt)
t1 = psb_wtime()
call a%cscnv(info,mold=acmold)
t2 = psb_Wtime() -t1
call psb_amx(ctxt,t2)
tcnvcsr = tcnvcsr + t2
if (j==1) tcnvc1 = t2
xc1 = x_col%get_vect()
call xv%bld(xc1)
call psb_geasb(bv,desc_a,info,scratch=.true.)
#ifdef HAVE_GPU
call aux_a%cscnv(agpu,info,mold=acoo)
call xg%bld(xc1,mold=vmold)
call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold)
call psb_barrier(ctxt)
t1 = psb_wtime()
call agpu%cscnv(info,mold=agmold)
call psb_gpu_DeviceSync()
t2 = psb_Wtime() -t1
call psb_amx(ctxt,t2)
if (j==1) tcnvg1 = t2
tcnvgpu = tcnvgpu + t2
#endif
end do
call psb_barrier(ctxt)
t1 = psb_wtime()
do i=1,ntests
call psb_spmm(cone,a,xv,czero,bv,desc_a,info)
end do
call psb_barrier(ctxt)
t2 = psb_wtime() - t1
call psb_amx(ctxt,t2)
#ifdef HAVE_GPU
! FIXME: cache flush needed here
call psb_barrier(ctxt)
tt1 = psb_wtime()
do i=1,ntests
call psb_spmm(cone,agpu,xv,czero,bg,desc_a,info)
if ((info /= 0).or.(psb_get_errstatus()/=0)) then
write(0,*) 'From 1 spmm',info,i,ntests
call psb_error()
stop
end if
end do
call psb_gpu_DeviceSync()
call psb_barrier(ctxt)
tt2 = psb_wtime() - tt1
call psb_amx(ctxt,tt2)
xc1 = bv%get_vect()
xc2 = bg%get_vect()
nr = desc_a%get_local_rows()
eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
call psb_amx(ctxt,eps)
if (iam==0) write(*,*) 'Max diff on xGPU',eps
call xg%sync()
! FIXME: cache flush needed here
call psb_barrier(ctxt)
gt1 = psb_wtime()
do i=1,ntests*ngpu
call psb_spmm(cone,agpu,xg,czero,bg,desc_a,info)
if ((info /= 0).or.(psb_get_errstatus()/=0)) then
write(0,*) 'From 2 spmm',info,i,ntests
call psb_error()
stop
end if
end do
! For timing purposes we need to make sure all threads
! in the device are done.
call psb_gpu_DeviceSync()
call psb_barrier(ctxt)
gt2 = psb_wtime() - gt1
call psb_amx(ctxt,gt2)
call bg%sync()
xc1 = bv%get_vect()
xc2 = bg%get_vect()
call psb_geaxpby(-cone,bg,+cone,bv,desc_a,info)
eps = psb_geamax(bv,desc_a,info)
call psb_amx(ctxt,t2)
nr = desc_a%get_local_rows()
eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
call psb_amx(ctxt,eps)
if (iam==0) write(*,*) 'Max diff on GPU',eps
#endif
amatsize = a%sizeof()
agmatsize = agpu%sizeof()
damatsize = amatsize
damatsize = damatsize/(1024*1024)
dgmatsize = agmatsize
dgmatsize = dgmatsize/(1024*1024)
descsize = psb_sizeof(desc_a)
call psb_sum(ctxt,damatsize)
call psb_sum(ctxt,dgmatsize)
call psb_sum(ctxt,descsize)
if (iam == psb_root_) then
write(psb_out_unit,'("Matrix: ",a)') mtrx_file
write(psb_out_unit,&
&'("Test on : ",i20," processors")') np
write(psb_out_unit,&
&'("Size of matrix : ",i20," ")') nrt
write(psb_out_unit,&
&'("Number of nonzeros : ",i20," ")') annz
write(psb_out_unit,&
&'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize
write(psb_out_unit,&
&'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize
write(psb_out_unit,&
&'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize
write(psb_out_unit,&
&'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize
flops = ntests*(2.d0*annz)
tflops = flops
gflops = flops * ngpu
write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt()
#ifdef HAVE_GPU
write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt()
write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')&
& tcnvc1
write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')&
& tcnvcsr
write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')&
& tcnvcsr/ncnv
write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')&
& tcnvg1
write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')&
& tcnvgpu
write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')&
& tcnvgpu/ncnv
#endif
write(psb_out_unit,&
& '("Number of flops (",i0," prod) : ",F20.0," ")') &
& ntests,flops
flops = flops / (t2)
tflops = tflops / (tt2)
gflops = gflops / (gt2)
write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')&
& ntests,t2
write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')&
& t2*1.d3/(1.d0*ntests)
write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')&
& flops/1.d6
#ifdef HAVE_GPU
write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')&
& ntests, tt2
write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')&
& tt2*1.d3/(1.d0*ntests)
write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')&
& tflops/1.d6
write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')&
& ngpu*ntests,gt2
write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')&
& gt2*1.d3/(1.d0*ntests*ngpu)
write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')&
& gflops/1.d6
#endif
!
! This computation assumes the data movement associated with CSR:
! it is minimal in terms of coefficients. Other formats may either move
! more data (padding etc.) or less data (if they can save on the indices).
!
nbytes = nr*(2*2*psb_sizeof_dp + psb_sizeof_ip)+&
& annz*(2*psb_sizeof_dp + psb_sizeof_ip)
bdwdth = ntests*nbytes/(t2*1.d6)
write(psb_out_unit,*)
write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth
#ifdef HAVE_GPU
bdwdth = ngpu*ntests*nbytes/(gt2*1.d6)
write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth
#endif
write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt()
write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
end if
call psb_gefree(b_col, desc_a,info)
call psb_gefree(x_col, desc_a,info)
call psb_gefree(xv, desc_a,info)
call psb_gefree(bv, desc_a,info)
call psb_spfree(a, desc_a,info)
#ifdef HAVE_GPU
call psb_gefree(xg, desc_a,info)
call psb_gefree(bg, desc_a,info)
call psb_spfree(agpu,desc_a,info)
call psb_gpu_exit()
#endif
call psb_cdfree(desc_a,info)
call psb_exit(ctxt)
stop
9999 continue
call psb_error(ctxt)
end program c_file_spmv

@ -1,496 +0,0 @@
!
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
program d_file_spmv
use psb_base_mod
use psb_util_mod
use psb_ext_mod
#ifdef HAVE_GPU
use psb_gpu_mod
#endif
use data_input
implicit none
! input parameters
character(len=200) :: mtrx_file
! sparse matrices
type(psb_dspmat_type) :: a, aux_a, agpu
! dense matrices
real(psb_dpk_), allocatable, target :: aux_b(:,:), d(:)
real(psb_dpk_), allocatable , save :: x_col_glob(:), r_col_glob(:)
real(psb_dpk_), pointer :: b_col_glob(:)
type(psb_d_vect_type) :: b_col, x_col, r_col
type(psb_d_vect_type) :: xg, bg, xv, bv
#ifdef HAVE_GPU
type(psb_d_vect_gpu) :: vmold
#endif
real(psb_dpk_), allocatable :: xc1(:),xc2(:)
! communications data structure
type(psb_desc_type):: desc_a
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: iam, np
integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes
real(psb_dpk_) :: err, eps, damatsize, dgmatsize
character(len=5) :: acfmt, agfmt
character(len=20) :: name
character(len=2) :: filefmt
integer, parameter :: iunit=12
integer, parameter :: times=2000
integer, parameter :: ntests=200, ngpu=50, ncnv=20
type(psb_d_coo_sparse_mat), target :: acoo
type(psb_d_csr_sparse_mat), target :: acsr
type(psb_d_ell_sparse_mat), target :: aell
type(psb_d_hll_sparse_mat), target :: ahll
type(psb_d_hdia_sparse_mat), target :: ahdia
#ifdef HAVE_GPU
type(psb_d_elg_sparse_mat), target :: aelg
type(psb_d_csrg_sparse_mat), target :: acsrg
type(psb_d_hybg_sparse_mat), target :: ahybg
type(psb_d_hlg_sparse_mat), target :: ahlg
type(psb_d_hdiag_sparse_mat), target :: ahdiag
#endif
class(psb_d_base_sparse_mat), pointer :: acmold, agmold
! other variables
integer(psb_lpk_) :: i,j,nrt, ns, nr, ig, nrg
integer(psb_ipk_) :: internal, m,ii,nnzero,info, ipart
real(psb_dpk_) :: t0,t1, t2, tprec, flops
real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,&
& tcnvcsr, tcnvc1, tcnvgpu, tcnvg1
integer :: nrhs, nrow, n_row, dim, nv, ne
integer, allocatable :: ivg(:), ipv(:)
call psb_init(ctxt)
call psb_info(ctxt,iam,np)
#ifdef HAVE_GPU
call psb_gpu_init(ctxt)
#endif
if (iam < 0) then
! This should not happen, but just in case
call psb_exit(ctxt)
stop
endif
name='file_spmv'
if(psb_get_errstatus() /= 0) goto 9999
info=psb_success_
call psb_set_errverbosity(2)
if (iam == psb_root_) then
write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
write(*,*) 'This is the ',trim(name),' sample program'
end if
#ifdef HAVE_GPU
write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount()
write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName())
#endif
if (iam == 0) then
write(*,*) 'Matrix? '
call read_data(mtrx_file,psb_inp_unit)
write(*,*) 'file format'
call read_data(filefmt,psb_inp_unit)
write(*,*) 'CPU format'
call read_data(acfmt,psb_inp_unit)
write(*,*) 'GPU format'
call read_data(agfmt,psb_inp_unit)
write(*,*) 'distribution '
call read_data(ipart,psb_inp_unit)
write(*,*) 'Read all data, going on'
end if
call psb_bcast(ctxt,mtrx_file)
call psb_bcast(ctxt,filefmt)
call psb_bcast(ctxt,acfmt)
call psb_bcast(ctxt,agfmt)
call psb_bcast(ctxt,ipart)
call psb_barrier(ctxt)
t0 = psb_wtime()
! read the input matrix to be processed and (possibly) the rhs
nrhs = 1
if (iam==psb_root_) then
select case(psb_toupper(filefmt))
case('MM')
! For Matrix Market we have an input file for the matrix
! and an (optional) second file for the RHS.
call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file)
case ('HB')
! For Harwell-Boeing we have a single file which may or may not
! contain an RHS.
call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file)
case default
info = -1
write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt
end select
if (info /= 0) then
write(psb_err_unit,*) 'Error while reading input matrix '
call psb_abort(ctxt)
end if
!
! Always get nnz from original matrix.
! Some formats add fill-in and do not keep track
! of how many were added. So if the original matrix
! contained some extra zeros, the count of entries
! is not recoverable exactly.
!
nrt = aux_a%get_nrows()
annz = aux_a%get_nzeros()
call psb_bcast(ctxt,annz)
call psb_bcast(ctxt,nrt)
write(psb_out_unit,'("Generating an rhs...")')
write(psb_out_unit,'(" ")')
call psb_realloc(nrt,1,aux_b,info)
if (info /= 0) then
call psb_errpush(4000,name)
goto 9999
endif
b_col_glob => aux_b(:,1)
do i=1, nrt
b_col_glob(i) = 1.d0
enddo
else
call psb_bcast(ctxt,annz)
call psb_bcast(ctxt,nrt)
end if
select case(psb_toupper(acfmt))
case('COO')
acmold => acoo
case('CSR')
acmold => acsr
case('ELL')
acmold => aell
case('HLL')
acmold => ahll
case('HDIA')
acmold => ahdia
case default
write(*,*) 'Unknown format defaulting to CSR'
acmold => acsr
end select
#ifdef HAVE_GPU
select case(psb_toupper(agfmt))
case('ELG')
agmold => aelg
case('HLG')
agmold => ahlg
case('CSRG')
agmold => acsrg
case('HYBG')
agmold => ahybg
case('HDIAG')
agmold => ahdiag
case default
write(*,*) 'Unknown format defaulting to HLG'
agmold => ahlg
end select
#endif
! switch over different partition types
if (ipart == 0) then
call psb_barrier(ctxt)
if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")')
allocate(ivg(nrt),ipv(np))
do i=1,nrt
call part_block(i,nrt,np,ipv,nv)
ivg(i) = ipv(1)
enddo
call psb_matdist(aux_a, a, ctxt, desc_a,info,vg=ivg)
else if (ipart == 2) then
if (iam==psb_root_) then
write(psb_out_unit,'("Partition type: graph")')
write(psb_out_unit,'(" ")')
! write(psb_err_unit,'("Build type: graph")')
call build_mtpart(aux_a,np)
endif
call psb_barrier(ctxt)
call distr_mtpart(psb_root_,ctxt)
call getv_mtpart(ivg)
call psb_matdist(aux_a, a, ctxt, desc_a,info,vg=ivg)
else
if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")')
call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block)
end if
call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_)
t2 = psb_wtime() - t0
call psb_amx(ctxt, t2)
if (iam==psb_root_) then
write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2
write(psb_out_unit,'(" ")')
end if
call a%cscnv(aux_a,info,mold=acoo)
tcnvcsr = 0
tcnvgpu = 0
nr = desc_a%get_local_rows()
nrg = desc_a%get_global_rows()
call psb_geall(x_col,desc_a,info)
do i=1, nr
call desc_a%l2g(i,ig,info)
call psb_geins(ione,(/ig/),(/(done + (done*ig)/nrg)/),x_col,desc_a,info)
end do
call psb_geasb(x_col,desc_a,info)
do j=1, ncnv
call aux_a%cscnv(a,info,mold=acoo)
call psb_barrier(ctxt)
t1 = psb_wtime()
call a%cscnv(info,mold=acmold)
t2 = psb_Wtime() -t1
call psb_amx(ctxt,t2)
tcnvcsr = tcnvcsr + t2
if (j==1) tcnvc1 = t2
xc1 = x_col%get_vect()
call xv%bld(xc1)
call psb_geasb(bv,desc_a,info,scratch=.true.)
#ifdef HAVE_GPU
call aux_a%cscnv(agpu,info,mold=acoo)
call xg%bld(xc1,mold=vmold)
call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold)
call psb_barrier(ctxt)
t1 = psb_wtime()
call agpu%cscnv(info,mold=agmold)
call psb_gpu_DeviceSync()
t2 = psb_Wtime() -t1
call psb_amx(ctxt,t2)
if (j==1) tcnvg1 = t2
tcnvgpu = tcnvgpu + t2
#endif
end do
call psb_barrier(ctxt)
t1 = psb_wtime()
do i=1,ntests
call psb_spmm(done,a,xv,dzero,bv,desc_a,info)
end do
call psb_barrier(ctxt)
t2 = psb_wtime() - t1
call psb_amx(ctxt,t2)
#ifdef HAVE_GPU
! FIXME: cache flush needed here
call psb_barrier(ctxt)
tt1 = psb_wtime()
do i=1,ntests
call psb_spmm(done,agpu,xv,dzero,bg,desc_a,info)
if ((info /= 0).or.(psb_get_errstatus()/=0)) then
write(0,*) 'From 1 spmm',info,i,ntests
call psb_error()
stop
end if
end do
call psb_gpu_DeviceSync()
call psb_barrier(ctxt)
tt2 = psb_wtime() - tt1
call psb_amx(ctxt,tt2)
xc1 = bv%get_vect()
xc2 = bg%get_vect()
nr = desc_a%get_local_rows()
eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
call psb_amx(ctxt,eps)
if (iam==0) write(*,*) 'Max diff on xGPU',eps
call xg%sync()
! FIXME: cache flush needed here
call psb_barrier(ctxt)
gt1 = psb_wtime()
do i=1,ntests*ngpu
call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info)
if ((info /= 0).or.(psb_get_errstatus()/=0)) then
write(0,*) 'From 2 spmm',info,i,ntests
call psb_error()
stop
end if
end do
! For timing purposes we need to make sure all threads
! in the device are done.
call psb_gpu_DeviceSync()
call psb_barrier(ctxt)
gt2 = psb_wtime() - gt1
call psb_amx(ctxt,gt2)
call bg%sync()
xc1 = bv%get_vect()
xc2 = bg%get_vect()
call psb_geaxpby(-done,bg,+done,bv,desc_a,info)
eps = psb_geamax(bv,desc_a,info)
call psb_amx(ctxt,t2)
nr = desc_a%get_local_rows()
eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
call psb_amx(ctxt,eps)
if (iam==0) write(*,*) 'Max diff on GPU',eps
#endif
amatsize = a%sizeof()
agmatsize = agpu%sizeof()
damatsize = amatsize
damatsize = damatsize/(1024*1024)
dgmatsize = agmatsize
dgmatsize = dgmatsize/(1024*1024)
descsize = psb_sizeof(desc_a)
call psb_sum(ctxt,damatsize)
call psb_sum(ctxt,dgmatsize)
call psb_sum(ctxt,descsize)
if (iam == psb_root_) then
write(psb_out_unit,'("Matrix: ",a)') mtrx_file
write(psb_out_unit,&
&'("Test on : ",i20," processors")') np
write(psb_out_unit,&
&'("Size of matrix : ",i20," ")') nrt
write(psb_out_unit,&
&'("Number of nonzeros : ",i20," ")') annz
write(psb_out_unit,&
&'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize
write(psb_out_unit,&
&'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize
write(psb_out_unit,&
&'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize
write(psb_out_unit,&
&'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize
flops = ntests*(2.d0*annz)
tflops = flops
gflops = flops * ngpu
write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt()
#ifdef HAVE_GPU
write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt()
write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')&
& tcnvc1
write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')&
& tcnvcsr
write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')&
& tcnvcsr/ncnv
write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')&
& tcnvg1
write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')&
& tcnvgpu
write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')&
& tcnvgpu/ncnv
#endif
write(psb_out_unit,&
& '("Number of flops (",i0," prod) : ",F20.0," ")') &
& ntests,flops
flops = flops / (t2)
tflops = tflops / (tt2)
gflops = gflops / (gt2)
write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')&
& ntests,t2
write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')&
& t2*1.d3/(1.d0*ntests)
write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')&
& flops/1.d6
#ifdef HAVE_GPU
write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')&
& ntests, tt2
write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')&
& tt2*1.d3/(1.d0*ntests)
write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')&
& tflops/1.d6
write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')&
& ngpu*ntests,gt2
write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')&
& gt2*1.d3/(1.d0*ntests*ngpu)
write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')&
& gflops/1.d6
#endif
!
! This computation assumes the data movement associated with CSR:
! it is minimal in terms of coefficients. Other formats may either move
! more data (padding etc.) or less data (if they can save on the indices).
!
nbytes = nr*(2*psb_sizeof_dp + psb_sizeof_ip)+&
& annz*(psb_sizeof_dp + psb_sizeof_ip)
bdwdth = ntests*nbytes/(t2*1.d6)
write(psb_out_unit,*)
write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth
#ifdef HAVE_GPU
bdwdth = ngpu*ntests*nbytes/(gt2*1.d6)
write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth
#endif
write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt()
write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
end if
call psb_gefree(b_col, desc_a,info)
call psb_gefree(x_col, desc_a,info)
call psb_gefree(xv, desc_a,info)
call psb_gefree(bv, desc_a,info)
call psb_spfree(a, desc_a,info)
#ifdef HAVE_GPU
call psb_gefree(xg, desc_a,info)
call psb_gefree(bg, desc_a,info)
call psb_spfree(agpu,desc_a,info)
call psb_gpu_exit()
#endif
call psb_cdfree(desc_a,info)
call psb_exit(ctxt)
stop
9999 continue
call psb_error(ctxt)
end program d_file_spmv

@ -1,496 +0,0 @@
!
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
program s_file_spmv
use psb_base_mod
use psb_util_mod
use psb_ext_mod
#ifdef HAVE_GPU
use psb_gpu_mod
#endif
use data_input
implicit none
! input parameters
character(len=200) :: mtrx_file
! sparse matrices
type(psb_sspmat_type) :: a, aux_a, agpu
! dense matrices
real(psb_spk_), allocatable, target :: aux_b(:,:), d(:)
real(psb_spk_), allocatable , save :: x_col_glob(:), r_col_glob(:)
real(psb_spk_), pointer :: b_col_glob(:)
type(psb_s_vect_type) :: b_col, x_col, r_col
type(psb_s_vect_type) :: xg, bg, xv, bv
#ifdef HAVE_GPU
type(psb_s_vect_gpu) :: vmold
#endif
real(psb_spk_), allocatable :: xc1(:),xc2(:)
! communications data structure
type(psb_desc_type):: desc_a
type(psb_ctxt_type) :: ctxt
integer :: iam, np
integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes
real(psb_spk_) :: err, eps, samatsize, sgmatsize
character(len=5) :: acfmt, agfmt
character(len=20) :: name
character(len=2) :: filefmt
integer, parameter :: iunit=12
integer, parameter :: times=2000
integer, parameter :: ntests=200, ngpu=50, ncnv=20
type(psb_s_coo_sparse_mat), target :: acoo
type(psb_s_csr_sparse_mat), target :: acsr
type(psb_s_ell_sparse_mat), target :: aell
type(psb_s_hll_sparse_mat), target :: ahll
type(psb_s_hdia_sparse_mat), target :: ahdia
#ifdef HAVE_GPU
type(psb_s_elg_sparse_mat), target :: aelg
type(psb_s_csrg_sparse_mat), target :: acsrg
type(psb_s_hybg_sparse_mat), target :: ahybg
type(psb_s_hlg_sparse_mat), target :: ahlg
type(psb_s_hdiag_sparse_mat), target :: ahdiag
#endif
class(psb_s_base_sparse_mat), pointer :: acmold, agmold
! other variables
integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg
integer :: internal, m,ii,nnzero
real(psb_dpk_) :: t0,t1, t2, tprec, flops
real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,&
& tcnvcsr, tcnvc1, tcnvgpu, tcnvg1
integer :: nrhs, nrow, n_row, dim, nv, ne
integer, allocatable :: ivg(:), ipv(:)
call psb_init(ctxt)
call psb_info(ctxt,iam,np)
#ifdef HAVE_GPU
call psb_gpu_init(ctxt)
#endif
if (iam < 0) then
! This should not happen, but just in case
call psb_exit(ctxt)
stop
endif
name='file_spmv'
if(psb_get_errstatus() /= 0) goto 9999
info=psb_success_
call psb_set_errverbosity(2)
if (iam == psb_root_) then
write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
write(*,*) 'This is the ',trim(name),' sample program'
end if
#ifdef HAVE_GPU
write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount()
write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName())
#endif
if (iam == 0) then
write(*,*) 'Matrix? '
call read_data(mtrx_file,psb_inp_unit)
write(*,*) 'file format'
call read_data(filefmt,psb_inp_unit)
write(*,*) 'CPU format'
call read_data(acfmt,psb_inp_unit)
write(*,*) 'GPU format'
call read_data(agfmt,psb_inp_unit)
write(*,*) 'distribution '
call read_data(ipart,psb_inp_unit)
write(*,*) 'Read all data, going on'
end if
call psb_bcast(ctxt,mtrx_file)
call psb_bcast(ctxt,filefmt)
call psb_bcast(ctxt,acfmt)
call psb_bcast(ctxt,agfmt)
call psb_bcast(ctxt,ipart)
call psb_barrier(ctxt)
t0 = psb_wtime()
! read the input matrix to be processed and (possibly) the rhs
nrhs = 1
if (iam==psb_root_) then
select case(psb_toupper(filefmt))
case('MM')
! For Matrix Market we have an input file for the matrix
! and an (optional) second file for the RHS.
call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file)
case ('HB')
! For Harwell-Boeing we have a single file which may or may not
! contain an RHS.
call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file)
case default
info = -1
write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt
end select
if (info /= 0) then
write(psb_err_unit,*) 'Error while reading input matrix '
call psb_abort(ctxt)
end if
!
! Always get nnz from original matrix.
! Some formats add fill-in and do not keep track
! of how many were added. So if the original matrix
! contained some extra zeros, the count of entries
! is not recoverable exactly.
!
nrt = aux_a%get_nrows()
annz = aux_a%get_nzeros()
call psb_bcast(ctxt,annz)
call psb_bcast(ctxt,nrt)
write(psb_out_unit,'("Generating an rhs...")')
write(psb_out_unit,'(" ")')
call psb_realloc(nrt,1,aux_b,info)
if (info /= 0) then
call psb_errpush(4000,name)
goto 9999
endif
b_col_glob => aux_b(:,1)
do i=1, nrt
b_col_glob(i) = 1.d0
enddo
else
call psb_bcast(ctxt,annz)
call psb_bcast(ctxt,nrt)
end if
select case(psb_toupper(acfmt))
case('COO')
acmold => acoo
case('CSR')
acmold => acsr
case('ELL')
acmold => aell
case('HLL')
acmold => ahll
case('HDIA')
acmold => ahdia
case default
write(*,*) 'Unknown format defaulting to CSR'
acmold => acsr
end select
#ifdef HAVE_GPU
select case(psb_toupper(agfmt))
case('ELG')
agmold => aelg
case('HLG')
agmold => ahlg
case('CSRG')
agmold => acsrg
case('HYBG')
agmold => ahybg
case('HDIAG')
agmold => ahdiag
case default
write(*,*) 'Unknown format defaulting to HLG'
agmold => ahlg
end select
#endif
! switch over different partition types
if (ipart == 0) then
call psb_barrier(ctxt)
if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")')
allocate(ivg(nrt),ipv(np))
do i=1,nrt
call part_block(i,nrt,np,ipv,nv)
ivg(i) = ipv(1)
enddo
call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg)
else if (ipart == 2) then
if (iam==psb_root_) then
write(psb_out_unit,'("Partition type: graph")')
write(psb_out_unit,'(" ")')
! write(psb_err_unit,'("Build type: graph")')
call build_mtpart(aux_a,np)
endif
call psb_barrier(ctxt)
call distr_mtpart(psb_root_,ctxt)
call getv_mtpart(ivg)
call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg)
else
if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")')
call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block)
end if
call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_)
t2 = psb_wtime() - t0
call psb_amx(ctxt, t2)
if (iam==psb_root_) then
write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2
write(psb_out_unit,'(" ")')
end if
call a%cscnv(aux_a,info,mold=acoo)
tcnvcsr = 0
tcnvgpu = 0
nr = desc_a%get_local_rows()
nrg = desc_a%get_global_rows()
call psb_geall(x_col,desc_a,info)
do i=1, nr
call desc_a%l2g(i,ig,info)
call psb_geins(ione,(/ig/),(/(sone + (sone*ig)/nrg)/),x_col,desc_a,info)
end do
call psb_geasb(x_col,desc_a,info)
do j=1, ncnv
call aux_a%cscnv(a,info,mold=acoo)
call psb_barrier(ctxt)
t1 = psb_wtime()
call a%cscnv(info,mold=acmold)
t2 = psb_Wtime() -t1
call psb_amx(ctxt,t2)
tcnvcsr = tcnvcsr + t2
if (j==1) tcnvc1 = t2
xc1 = x_col%get_vect()
call xv%bld(xc1)
call psb_geasb(bv,desc_a,info,scratch=.true.)
#ifdef HAVE_GPU
call aux_a%cscnv(agpu,info,mold=acoo)
call xg%bld(xc1,mold=vmold)
call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold)
call psb_barrier(ctxt)
t1 = psb_wtime()
call agpu%cscnv(info,mold=agmold)
call psb_gpu_DeviceSync()
t2 = psb_Wtime() -t1
call psb_amx(ctxt,t2)
if (j==1) tcnvg1 = t2
tcnvgpu = tcnvgpu + t2
#endif
end do
call psb_barrier(ctxt)
t1 = psb_wtime()
do i=1,ntests
call psb_spmm(sone,a,xv,szero,bv,desc_a,info)
end do
call psb_barrier(ctxt)
t2 = psb_wtime() - t1
call psb_amx(ctxt,t2)
#ifdef HAVE_GPU
! FIXME: cache flush needed here
call psb_barrier(ctxt)
tt1 = psb_wtime()
do i=1,ntests
call psb_spmm(sone,agpu,xv,szero,bg,desc_a,info)
if ((info /= 0).or.(psb_get_errstatus()/=0)) then
write(0,*) 'From 1 spmm',info,i,ntests
call psb_error()
stop
end if
end do
call psb_gpu_DeviceSync()
call psb_barrier(ctxt)
tt2 = psb_wtime() - tt1
call psb_amx(ctxt,tt2)
xc1 = bv%get_vect()
xc2 = bg%get_vect()
nr = desc_a%get_local_rows()
eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
call psb_amx(ctxt,eps)
if (iam==0) write(*,*) 'Max diff on xGPU',eps
call xg%sync()
! FIXME: cache flush needed here
call psb_barrier(ctxt)
gt1 = psb_wtime()
do i=1,ntests*ngpu
call psb_spmm(sone,agpu,xg,szero,bg,desc_a,info)
if ((info /= 0).or.(psb_get_errstatus()/=0)) then
write(0,*) 'From 2 spmm',info,i,ntests
call psb_error()
stop
end if
end do
! For timing purposes we need to make sure all threads
! in the device are done.
call psb_gpu_DeviceSync()
call psb_barrier(ctxt)
gt2 = psb_wtime() - gt1
call psb_amx(ctxt,gt2)
call bg%sync()
xc1 = bv%get_vect()
xc2 = bg%get_vect()
call psb_geaxpby(-sone,bg,+sone,bv,desc_a,info)
eps = psb_geamax(bv,desc_a,info)
call psb_amx(ctxt,t2)
nr = desc_a%get_local_rows()
eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
call psb_amx(ctxt,eps)
if (iam==0) write(*,*) 'Max diff on GPU',eps
#endif
amatsize = a%sizeof()
agmatsize = agpu%sizeof()
samatsize = amatsize
samatsize = samatsize/(1024*1024)
sgmatsize = agmatsize
sgmatsize = sgmatsize/(1024*1024)
descsize = psb_sizeof(desc_a)
call psb_sum(ctxt,samatsize)
call psb_sum(ctxt,sgmatsize)
call psb_sum(ctxt,descsize)
if (iam == psb_root_) then
write(psb_out_unit,'("Matrix: ",a)') mtrx_file
write(psb_out_unit,&
&'("Test on : ",i20," processors")') np
write(psb_out_unit,&
&'("Size of matrix : ",i20," ")') nrt
write(psb_out_unit,&
&'("Number of nonzeros : ",i20," ")') annz
write(psb_out_unit,&
&'("Memory occupation CPU (MBytes) : ",f20.2," ")') samatsize
write(psb_out_unit,&
&'("Memory occupation GPU (MBytes) : ",f20.2," ")') sgmatsize
write(psb_out_unit,&
&'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize
write(psb_out_unit,&
&'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize
flops = ntests*(2.d0*annz)
tflops = flops
gflops = flops * ngpu
write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt()
#ifdef HAVE_GPU
write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt()
write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')&
& tcnvc1
write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')&
& tcnvcsr
write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')&
& tcnvcsr/ncnv
write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')&
& tcnvg1
write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')&
& tcnvgpu
write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')&
& tcnvgpu/ncnv
#endif
write(psb_out_unit,&
& '("Number of flops (",i0," prod) : ",F20.0," ")') &
& ntests,flops
flops = flops / (t2)
tflops = tflops / (tt2)
gflops = gflops / (gt2)
write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')&
& ntests,t2
write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')&
& t2*1.d3/(1.d0*ntests)
write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')&
& flops/1.d6
#ifdef HAVE_GPU
write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')&
& ntests, tt2
write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')&
& tt2*1.d3/(1.d0*ntests)
write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')&
& tflops/1.d6
write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')&
& ngpu*ntests,gt2
write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')&
& gt2*1.d3/(1.d0*ntests*ngpu)
write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')&
& gflops/1.d6
#endif
!
! This computation assumes the data movement associated with CSR:
! it is minimal in terms of coefficients. Other formats may either move
! more data (padding etc.) or less data (if they can save on the indices).
!
nbytes = nr*(2*psb_sizeof_sp + psb_sizeof_ip)+&
& annz*(psb_sizeof_sp + psb_sizeof_ip)
bdwdth = ntests*nbytes/(t2*1.d6)
write(psb_out_unit,*)
write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth
#ifdef HAVE_GPU
bdwdth = ngpu*ntests*nbytes/(gt2*1.d6)
write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth
#endif
write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt()
write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
end if
call psb_gefree(b_col, desc_a,info)
call psb_gefree(x_col, desc_a,info)
call psb_gefree(xv, desc_a,info)
call psb_gefree(bv, desc_a,info)
call psb_spfree(a, desc_a,info)
#ifdef HAVE_GPU
call psb_gefree(xg, desc_a,info)
call psb_gefree(bg, desc_a,info)
call psb_spfree(agpu,desc_a,info)
call psb_gpu_exit()
#endif
call psb_cdfree(desc_a,info)
call psb_exit(ctxt)
stop
9999 continue
call psb_error(ctxt)
end program s_file_spmv

@ -1,491 +0,0 @@
!
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
program z_file_spmv
use psb_base_mod
use psb_util_mod
use psb_ext_mod
#ifdef HAVE_GPU
use psb_gpu_mod
#endif
use data_input
implicit none
! input parameters
character(len=200) :: mtrx_file
! sparse matrices
type(psb_zspmat_type) :: a, aux_a, agpu
! dense matrices
complex(psb_dpk_), allocatable, target :: aux_b(:,:), d(:)
complex(psb_dpk_), allocatable , save :: x_col_glob(:), r_col_glob(:)
complex(psb_dpk_), pointer :: b_col_glob(:)
type(psb_z_vect_type) :: b_col, x_col, r_col
type(psb_z_vect_type) :: xg, bg, xv, bv
#ifdef HAVE_GPU
type(psb_z_vect_gpu) :: vmold
#endif
complex(psb_dpk_), allocatable :: xc1(:),xc2(:)
! communications data structure
type(psb_desc_type):: desc_a
type(psb_ctxt_type) :: ctxt
integer :: iam, np
integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes
real(psb_dpk_) :: damatsize, dgmatsize
complex(psb_dpk_) :: err, eps
character(len=5) :: acfmt, agfmt
character(len=20) :: name
character(len=2) :: filefmt
integer, parameter :: iunit=12
integer, parameter :: times=2000
integer, parameter :: ntests=200, ngpu=50, ncnv=20
type(psb_z_coo_sparse_mat), target :: acoo
type(psb_z_csr_sparse_mat), target :: acsr
type(psb_z_ell_sparse_mat), target :: aell
type(psb_z_hll_sparse_mat), target :: ahll
#ifdef HAVE_GPU
type(psb_z_elg_sparse_mat), target :: aelg
type(psb_z_csrg_sparse_mat), target :: acsrg
type(psb_z_hybg_sparse_mat), target :: ahybg
type(psb_z_hlg_sparse_mat), target :: ahlg
#endif
class(psb_z_base_sparse_mat), pointer :: acmold, agmold
! other variables
integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg
integer :: internal, m,ii,nnzero
real(psb_dpk_) :: t0,t1, t2, tprec, flops
real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,&
& tcnvcsr, tcnvc1, tcnvgpu, tcnvg1
integer :: nrhs, nrow, n_row, dim, nv, ne
integer, allocatable :: ivg(:), ipv(:)
call psb_init(ctxt)
call psb_info(ctxt,iam,np)
#ifdef HAVE_GPU
call psb_gpu_init(ctxt)
#endif
if (iam < 0) then
! This should not happen, but just in case
call psb_exit(ctxt)
stop
endif
name='file_spmv'
if(psb_get_errstatus() /= 0) goto 9999
info=psb_success_
call psb_set_errverbosity(2)
if (iam == psb_root_) then
write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
write(*,*) 'This is the ',trim(name),' sample program'
end if
#ifdef HAVE_GPU
write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount()
write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName())
#endif
if (iam == 0) then
write(*,*) 'Matrix? '
call read_data(mtrx_file,psb_inp_unit)
write(*,*) 'file format'
call read_data(filefmt,psb_inp_unit)
write(*,*) 'CPU format'
call read_data(acfmt,psb_inp_unit)
write(*,*) 'GPU format'
call read_data(agfmt,psb_inp_unit)
write(*,*) 'distribution '
call read_data(ipart,psb_inp_unit)
write(*,*) 'Read all data, going on'
end if
call psb_bcast(ctxt,mtrx_file)
call psb_bcast(ctxt,filefmt)
call psb_bcast(ctxt,acfmt)
call psb_bcast(ctxt,agfmt)
call psb_bcast(ctxt,ipart)
call psb_barrier(ctxt)
t0 = psb_wtime()
! read the input matrix to be processed and (possibly) the rhs
nrhs = 1
if (iam==psb_root_) then
select case(psb_toupper(filefmt))
case('MM')
! For Matrix Market we have an input file for the matrix
! and an (optional) second file for the RHS.
call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file)
case ('HB')
! For Harwell-Boeing we have a single file which may or may not
! contain an RHS.
call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file)
case default
info = -1
write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt
end select
if (info /= 0) then
write(psb_err_unit,*) 'Error while reading input matrix '
call psb_abort(ctxt)
end if
!
! Always get nnz from original matrix.
! Some formats add fill-in and do not keep track
! of how many were added. So if the original matrix
! contained some extra zeros, the count of entries
! is not recoverable exactly.
!
nrt = aux_a%get_nrows()
annz = aux_a%get_nzeros()
call psb_bcast(ctxt,annz)
call psb_bcast(ctxt,nrt)
write(psb_out_unit,'("Generating an rhs...")')
write(psb_out_unit,'(" ")')
call psb_realloc(nrt,1,aux_b,info)
if (info /= 0) then
call psb_errpush(4000,name)
goto 9999
endif
b_col_glob => aux_b(:,1)
do i=1, nrt
b_col_glob(i) = 1.d0
enddo
else
call psb_bcast(ctxt,annz)
call psb_bcast(ctxt,nrt)
end if
select case(psb_toupper(acfmt))
case('COO')
acmold => acoo
case('CSR')
acmold => acsr
case('ELL')
acmold => aell
case('HLL')
acmold => ahll
case default
write(*,*) 'Unknown format defaulting to CSR'
acmold => acsr
end select
#ifdef HAVE_GPU
select case(psb_toupper(agfmt))
case('ELG')
agmold => aelg
case('HLG')
agmold => ahlg
case('CSRG')
agmold => acsrg
case('HYBG')
agmold => ahybg
case default
write(*,*) 'Unknown format defaulting to HLG'
agmold => ahlg
end select
#endif
! switch over different partition types
if (ipart == 0) then
call psb_barrier(ctxt)
if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")')
allocate(ivg(nrt),ipv(np))
do i=1,nrt
call part_block(i,nrt,np,ipv,nv)
ivg(i) = ipv(1)
enddo
call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg)
else if (ipart == 2) then
if (iam==psb_root_) then
write(psb_out_unit,'("Partition type: graph")')
write(psb_out_unit,'(" ")')
! write(psb_err_unit,'("Build type: graph")')
call build_mtpart(aux_a,np)
endif
call psb_barrier(ctxt)
call distr_mtpart(psb_root_,ctxt)
call getv_mtpart(ivg)
call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg)
else
if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")')
call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block)
end if
call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_)
t2 = psb_wtime() - t0
call psb_amx(ctxt, t2)
if (iam==psb_root_) then
write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2
write(psb_out_unit,'(" ")')
end if
call a%cscnv(aux_a,info,mold=acoo)
tcnvcsr = 0
tcnvgpu = 0
nr = desc_a%get_local_rows()
nrg = desc_a%get_global_rows()
call psb_geall(x_col,desc_a,info)
do i=1, nr
call desc_a%l2g(i,ig,info)
call psb_geins(ione,(/ig/),(/(zone + (zone*ig)/nrg)/),x_col,desc_a,info)
end do
call psb_geasb(x_col,desc_a,info)
do j=1, ncnv
call aux_a%cscnv(a,info,mold=acoo)
call psb_barrier(ctxt)
t1 = psb_wtime()
call a%cscnv(info,mold=acmold)
t2 = psb_Wtime() -t1
call psb_amx(ctxt,t2)
tcnvcsr = tcnvcsr + t2
if (j==1) tcnvc1 = t2
xc1 = x_col%get_vect()
call xv%bld(xc1)
call psb_geasb(bv,desc_a,info,scratch=.true.)
#ifdef HAVE_GPU
call aux_a%cscnv(agpu,info,mold=acoo)
call xg%bld(xc1,mold=vmold)
call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold)
call psb_barrier(ctxt)
t1 = psb_wtime()
call agpu%cscnv(info,mold=agmold)
call psb_gpu_DeviceSync()
t2 = psb_Wtime() -t1
call psb_amx(ctxt,t2)
if (j==1) tcnvg1 = t2
tcnvgpu = tcnvgpu + t2
#endif
end do
call psb_barrier(ctxt)
t1 = psb_wtime()
do i=1,ntests
call psb_spmm(zone,a,xv,zzero,bv,desc_a,info)
end do
call psb_barrier(ctxt)
t2 = psb_wtime() - t1
call psb_amx(ctxt,t2)
#ifdef HAVE_GPU
! FIXME: cache flush needed here
call psb_barrier(ctxt)
tt1 = psb_wtime()
do i=1,ntests
call psb_spmm(zone,agpu,xv,zzero,bg,desc_a,info)
if ((info /= 0).or.(psb_get_errstatus()/=0)) then
write(0,*) 'From 1 spmm',info,i,ntests
call psb_error()
stop
end if
end do
call psb_gpu_DeviceSync()
call psb_barrier(ctxt)
tt2 = psb_wtime() - tt1
call psb_amx(ctxt,tt2)
xc1 = bv%get_vect()
xc2 = bg%get_vect()
nr = desc_a%get_local_rows()
eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
call psb_amx(ctxt,eps)
if (iam==0) write(*,*) 'Max diff on xGPU',eps
call xg%sync()
! FIXME: cache flush needed here
call psb_barrier(ctxt)
gt1 = psb_wtime()
do i=1,ntests*ngpu
call psb_spmm(zone,agpu,xg,zzero,bg,desc_a,info)
if ((info /= 0).or.(psb_get_errstatus()/=0)) then
write(0,*) 'From 2 spmm',info,i,ntests
call psb_error()
stop
end if
end do
! For timing purposes we need to make sure all threads
! in the device are done.
call psb_gpu_DeviceSync()
call psb_barrier(ctxt)
gt2 = psb_wtime() - gt1
call psb_amx(ctxt,gt2)
call bg%sync()
xc1 = bv%get_vect()
xc2 = bg%get_vect()
call psb_geaxpby(-zone,bg,+zone,bv,desc_a,info)
eps = psb_geamax(bv,desc_a,info)
call psb_amx(ctxt,t2)
nr = desc_a%get_local_rows()
eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
call psb_amx(ctxt,eps)
if (iam==0) write(*,*) 'Max diff on GPU',eps
#endif
amatsize = a%sizeof()
agmatsize = agpu%sizeof()
damatsize = amatsize
damatsize = damatsize/(1024*1024)
dgmatsize = agmatsize
dgmatsize = dgmatsize/(1024*1024)
descsize = psb_sizeof(desc_a)
call psb_sum(ctxt,damatsize)
call psb_sum(ctxt,dgmatsize)
call psb_sum(ctxt,descsize)
if (iam == psb_root_) then
write(psb_out_unit,'("Matrix: ",a)') mtrx_file
write(psb_out_unit,&
&'("Test on : ",i20," processors")') np
write(psb_out_unit,&
&'("Size of matrix : ",i20," ")') nrt
write(psb_out_unit,&
&'("Number of nonzeros : ",i20," ")') annz
write(psb_out_unit,&
&'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize
write(psb_out_unit,&
&'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize
write(psb_out_unit,&
&'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize
write(psb_out_unit,&
&'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize
flops = ntests*(2.d0*annz)
tflops = flops
gflops = flops * ngpu
write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt()
#ifdef HAVE_GPU
write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt()
write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')&
& tcnvc1
write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')&
& tcnvcsr
write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')&
& tcnvcsr/ncnv
write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')&
& tcnvg1
write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')&
& tcnvgpu
write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')&
& tcnvgpu/ncnv
#endif
write(psb_out_unit,&
& '("Number of flops (",i0," prod) : ",F20.0," ")') &
& ntests,flops
flops = flops / (t2)
tflops = tflops / (tt2)
gflops = gflops / (gt2)
write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')&
& ntests,t2
write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')&
& t2*1.d3/(1.d0*ntests)
write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')&
& flops/1.d6
#ifdef HAVE_GPU
write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')&
& ntests, tt2
write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')&
& tt2*1.d3/(1.d0*ntests)
write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')&
& tflops/1.d6
write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')&
& ngpu*ntests,gt2
write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')&
& gt2*1.d3/(1.d0*ntests*ngpu)
write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')&
& gflops/1.d6
#endif
!
! This computation assumes the data movement associated with CSR:
! it is minimal in terms of coefficients. Other formats may either move
! more data (padding etc.) or less data (if they can save on the indices).
!
nbytes = nr*(2*2*psb_sizeof_dp + psb_sizeof_ip)+&
& annz*(2*psb_sizeof_dp + psb_sizeof_ip)
bdwdth = ntests*nbytes/(t2*1.d6)
write(psb_out_unit,*)
write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth
#ifdef HAVE_GPU
bdwdth = ngpu*ntests*nbytes/(gt2*1.d6)
write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth
#endif
write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt()
write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
end if
call psb_gefree(b_col, desc_a,info)
call psb_gefree(x_col, desc_a,info)
call psb_gefree(xv, desc_a,info)
call psb_gefree(bv, desc_a,info)
call psb_spfree(a, desc_a,info)
#ifdef HAVE_GPU
call psb_gefree(xg, desc_a,info)
call psb_gefree(bg, desc_a,info)
call psb_spfree(agpu,desc_a,info)
call psb_gpu_exit()
#endif
call psb_cdfree(desc_a,info)
call psb_exit(ctxt)
stop
9999 continue
call psb_error(ctxt)
end program z_file_spmv

@ -16,7 +16,7 @@ FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).
EXEDIR=./runs
all: runsd hello pingpong tsum tsum1
all: runsd hello pingpong
runsd:
(if test ! -d runs ; then mkdir runs; fi)
@ -28,13 +28,6 @@ hello: hello.o
pingpong: pingpong.o
$(FLINK) pingpong.o -o pingpong $(PSBLAS_LIB) $(LDLIBS)
/bin/mv pingpong $(EXEDIR)
tsum: tsum.o
$(FLINK) tsum.o -o tsum $(PSBLAS_LIB) $(LDLIBS)
/bin/mv tsum $(EXEDIR)
tsum1: tsum1.o
$(FLINK) tsum1.o -o tsum1 $(PSBLAS_LIB) $(LDLIBS)
/bin/mv tsum1 $(EXEDIR)
clean:

@ -26,7 +26,7 @@ int metis_PartGraphKway_C(idx_t *n, idx_t *ixadj, idx_t *iadj, idx_t *ivwg,
/* NULL,NULL,NULL,(idx_t *)nparts,NULL,NULL,NULL, */
/* &objval,(idx_t *)graphpart); */
res = METIS_PartGraphKway((idx_t*)n,(idx_t *)&ncon,(idx_t *)ixadj,(idx_t *)iadj,
NULL,NULL,NULL,(idx_t *)nparts,weights,NULL,options,
NULL,NULL,NULL,(idx_t *)nparts,(void *)weights,NULL,options,
&objval,(idx_t *)graphpart);
}
if (res == METIS_OK) {

Loading…
Cancel
Save