diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90 index 38b740a7..3fe001c8 100644 --- a/base/modules/auxil/psi_c_serial_mod.f90 +++ b/base/modules/auxil/psi_c_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_c_serial_mod end subroutine psi_caxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_spk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_c_serial_mod complex(psb_spk_), intent (inout) :: z(:) complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_cabgdxyz - end interface psi_abgdxyz + end subroutine psi_c_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90 index 1d65c5f6..a08263df 100644 --- a/base/modules/auxil/psi_d_serial_mod.f90 +++ b/base/modules/auxil/psi_d_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_d_serial_mod end subroutine psi_daxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_dpk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_d_serial_mod real(psb_dpk_), intent (inout) :: z(:) real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_dabgdxyz - end interface psi_abgdxyz + end subroutine psi_d_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90 index 6f4e8c06..1f1bebd7 100644 --- a/base/modules/auxil/psi_e_serial_mod.f90 +++ b/base/modules/auxil/psi_e_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_e_serial_mod end subroutine psi_eaxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_e_serial_mod integer(psb_epk_), intent (inout) :: z(:) integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_eabgdxyz - end interface psi_abgdxyz + end subroutine psi_e_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90 index ffa14059..770d3256 100644 --- a/base/modules/auxil/psi_i2_serial_mod.f90 +++ b/base/modules/auxil/psi_i2_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_i2_serial_mod end subroutine psi_i2axpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_i2_serial_mod integer(psb_i2pk_), intent (inout) :: z(:) integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_i2abgdxyz - end interface psi_abgdxyz + end subroutine psi_i2_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90 index 5661fdbf..3583cccc 100644 --- a/base/modules/auxil/psi_m_serial_mod.f90 +++ b/base/modules/auxil/psi_m_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_m_serial_mod end subroutine psi_maxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_m_serial_mod integer(psb_mpk_), intent (inout) :: z(:) integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_mabgdxyz - end interface psi_abgdxyz + end subroutine psi_m_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90 index 5cc17d58..3e0c6d91 100644 --- a/base/modules/auxil/psi_s_serial_mod.f90 +++ b/base/modules/auxil/psi_s_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_s_serial_mod end subroutine psi_saxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_spk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_s_serial_mod real(psb_spk_), intent (inout) :: z(:) real(psb_spk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_sabgdxyz - end interface psi_abgdxyz + end subroutine psi_s_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90 index 8a3f053d..a8ea734e 100644 --- a/base/modules/auxil/psi_z_serial_mod.f90 +++ b/base/modules/auxil/psi_z_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_z_serial_mod end subroutine psi_zaxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_dpk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_z_serial_mod complex(psb_dpk_), intent (inout) :: z(:) complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_zabgdxyz - end interface psi_abgdxyz + end subroutine psi_z_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/psblas/psb_c_psblas_mod.F90 b/base/modules/psblas/psb_c_psblas_mod.F90 index 7f7f937c..591dec09 100644 --- a/base/modules/psblas/psb_c_psblas_mod.F90 +++ b/base/modules/psblas/psb_c_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_c_psblas_mod end subroutine psb_caxpby end interface - interface psb_abgdxyz - subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_upd_xyz + subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_spk_, psb_ipk_, & & psb_c_vect_type, psb_cspmat_type @@ -154,8 +154,8 @@ module psb_c_psblas_mod complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_cabgdxyz_vect - end interface psb_abgdxyz + end subroutine psb_c_upd_xyz_vect + end interface psb_upd_xyz interface psb_geamax function psb_camax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_d_psblas_mod.F90 b/base/modules/psblas/psb_d_psblas_mod.F90 index 12090956..b200bc8a 100644 --- a/base/modules/psblas/psb_d_psblas_mod.F90 +++ b/base/modules/psblas/psb_d_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_d_psblas_mod end subroutine psb_daxpby end interface - interface psb_abgdxyz - subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_upd_xyz + subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_dpk_, psb_ipk_, & & psb_d_vect_type, psb_dspmat_type @@ -154,8 +154,8 @@ module psb_d_psblas_mod real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_dabgdxyz_vect - end interface psb_abgdxyz + end subroutine psb_d_upd_xyz_vect + end interface psb_upd_xyz interface psb_geamax function psb_damax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_s_psblas_mod.F90 b/base/modules/psblas/psb_s_psblas_mod.F90 index 7a7ce783..a60da025 100644 --- a/base/modules/psblas/psb_s_psblas_mod.F90 +++ b/base/modules/psblas/psb_s_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_s_psblas_mod end subroutine psb_saxpby end interface - interface psb_abgdxyz - subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_upd_xyz + subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_spk_, psb_ipk_, & & psb_s_vect_type, psb_sspmat_type @@ -154,8 +154,8 @@ module psb_s_psblas_mod real(psb_spk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_sabgdxyz_vect - end interface psb_abgdxyz + end subroutine psb_s_upd_xyz_vect + end interface psb_upd_xyz interface psb_geamax function psb_samax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_z_psblas_mod.F90 b/base/modules/psblas/psb_z_psblas_mod.F90 index bcfe9caa..241df2b9 100644 --- a/base/modules/psblas/psb_z_psblas_mod.F90 +++ b/base/modules/psblas/psb_z_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_z_psblas_mod end subroutine psb_zaxpby end interface - interface psb_abgdxyz - subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_upd_xyz + subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_dpk_, psb_ipk_, & & psb_z_vect_type, psb_zspmat_type @@ -154,8 +154,8 @@ module psb_z_psblas_mod complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_zabgdxyz_vect - end interface psb_abgdxyz + end subroutine psb_z_upd_xyz_vect + end interface psb_upd_xyz interface psb_geamax function psb_zamax(x, desc_a, info, jx,global) diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index 41bab5ab..4dac86d6 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_c_base_vect_mod procedure, pass(z) :: axpby_v2 => c_base_axpby_v2 procedure, pass(z) :: axpby_a2 => c_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => c_base_abgdxyz + procedure, pass(z) :: upd_xyz => c_base_upd_xyz procedure, pass(w) :: xyzw => c_base_xyzw ! @@ -1130,12 +1130,12 @@ contains end subroutine c_base_axpby_a2 ! - ! ABGDXYZ is invoked via Z, hence the structure below. + ! UPD_XYZ is invoked via Z, hence the structure below. ! ! - !> Function base_abgdxyz + !> Function base_upd_xyz !! \memberof psb_c_base_vect_type - !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1146,7 +1146,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine c_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1159,11 +1159,11 @@ contains if (x%is_dev().and.(alpha/=czero)) call x%sync() if (y%is_dev().and.(beta/=czero)) call y%sync() if (z%is_dev().and.(delta/=czero)) call z%sync() - call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine c_base_abgdxyz + end subroutine c_base_upd_xyz subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index 865f9456..1e9510f2 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_c_vect_mod procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => c_vect_abgdxyz + procedure, pass(z) :: upd_xyz => c_vect_upd_xyz procedure, pass(z) :: xyzw => c_vect_xyzw procedure, pass(y) :: mlt_v => c_vect_mlt_v @@ -774,7 +774,7 @@ contains end subroutine c_vect_axpby_a2 - subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine c_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -785,9 +785,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine c_vect_abgdxyz + end subroutine c_vect_upd_xyz subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 1ad1ffa5..f07b5aed 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_d_base_vect_mod procedure, pass(z) :: axpby_v2 => d_base_axpby_v2 procedure, pass(z) :: axpby_a2 => d_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => d_base_abgdxyz + procedure, pass(z) :: upd_xyz => d_base_upd_xyz procedure, pass(w) :: xyzw => d_base_xyzw ! @@ -1137,12 +1137,12 @@ contains end subroutine d_base_axpby_a2 ! - ! ABGDXYZ is invoked via Z, hence the structure below. + ! UPD_XYZ is invoked via Z, hence the structure below. ! ! - !> Function base_abgdxyz + !> Function base_upd_xyz !! \memberof psb_d_base_vect_type - !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1153,7 +1153,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine d_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1166,11 +1166,11 @@ contains if (x%is_dev().and.(alpha/=dzero)) call x%sync() if (y%is_dev().and.(beta/=dzero)) call y%sync() if (z%is_dev().and.(delta/=dzero)) call z%sync() - call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine d_base_abgdxyz + end subroutine d_base_upd_xyz subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index 55dd8230..ae3062dd 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_d_vect_mod procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => d_vect_abgdxyz + procedure, pass(z) :: upd_xyz => d_vect_upd_xyz procedure, pass(z) :: xyzw => d_vect_xyzw procedure, pass(y) :: mlt_v => d_vect_mlt_v @@ -781,7 +781,7 @@ contains end subroutine d_vect_axpby_a2 - subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine d_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -792,9 +792,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine d_vect_abgdxyz + end subroutine d_vect_upd_xyz subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index 26b82c31..596cd634 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_s_base_vect_mod procedure, pass(z) :: axpby_v2 => s_base_axpby_v2 procedure, pass(z) :: axpby_a2 => s_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => s_base_abgdxyz + procedure, pass(z) :: upd_xyz => s_base_upd_xyz procedure, pass(w) :: xyzw => s_base_xyzw ! @@ -1137,12 +1137,12 @@ contains end subroutine s_base_axpby_a2 ! - ! ABGDXYZ is invoked via Z, hence the structure below. + ! UPD_XYZ is invoked via Z, hence the structure below. ! ! - !> Function base_abgdxyz + !> Function base_upd_xyz !! \memberof psb_s_base_vect_type - !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1153,7 +1153,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine s_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1166,11 +1166,11 @@ contains if (x%is_dev().and.(alpha/=szero)) call x%sync() if (y%is_dev().and.(beta/=szero)) call y%sync() if (z%is_dev().and.(delta/=szero)) call z%sync() - call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine s_base_abgdxyz + end subroutine s_base_upd_xyz subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index a50b2a0a..cad4659c 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_s_vect_mod procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => s_vect_abgdxyz + procedure, pass(z) :: upd_xyz => s_vect_upd_xyz procedure, pass(z) :: xyzw => s_vect_xyzw procedure, pass(y) :: mlt_v => s_vect_mlt_v @@ -781,7 +781,7 @@ contains end subroutine s_vect_axpby_a2 - subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine s_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -792,9 +792,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine s_vect_abgdxyz + end subroutine s_vect_upd_xyz subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index a3afc9c1..1bbdfba1 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_z_base_vect_mod procedure, pass(z) :: axpby_v2 => z_base_axpby_v2 procedure, pass(z) :: axpby_a2 => z_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => z_base_abgdxyz + procedure, pass(z) :: upd_xyz => z_base_upd_xyz procedure, pass(w) :: xyzw => z_base_xyzw ! @@ -1130,12 +1130,12 @@ contains end subroutine z_base_axpby_a2 ! - ! ABGDXYZ is invoked via Z, hence the structure below. + ! UPD_XYZ is invoked via Z, hence the structure below. ! ! - !> Function base_abgdxyz + !> Function base_upd_xyz !! \memberof psb_z_base_vect_type - !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1146,7 +1146,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine z_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1159,11 +1159,11 @@ contains if (x%is_dev().and.(alpha/=zzero)) call x%sync() if (y%is_dev().and.(beta/=zzero)) call y%sync() if (z%is_dev().and.(delta/=zzero)) call z%sync() - call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine z_base_abgdxyz + end subroutine z_base_upd_xyz subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index 21e0c546..48f2e947 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_z_vect_mod procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => z_vect_abgdxyz + procedure, pass(z) :: upd_xyz => z_vect_upd_xyz procedure, pass(z) :: xyzw => z_vect_xyzw procedure, pass(y) :: mlt_v => z_vect_mlt_v @@ -774,7 +774,7 @@ contains end subroutine z_vect_axpby_a2 - subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine z_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -785,9 +785,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine z_vect_abgdxyz + end subroutine z_vect_upd_xyz subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index 7c22bb06..5d80ef00 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info) end subroutine psb_caddconst_vect -subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect + use psb_base_mod, psb_protect_name => psb_c_upd_xyz_vect implicit none type(psb_c_vect_type), intent (inout) :: x type(psb_c_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_cabgdxyz_vect +end subroutine psb_c_upd_xyz_vect diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index 1de77647..38ebe465 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info) end subroutine psb_daddconst_vect -subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect + use psb_base_mod, psb_protect_name => psb_d_upd_xyz_vect implicit none type(psb_d_vect_type), intent (inout) :: x type(psb_d_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_dabgdxyz_vect +end subroutine psb_d_upd_xyz_vect diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 1b1f24e6..0055fdbe 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info) end subroutine psb_saddconst_vect -subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect + use psb_base_mod, psb_protect_name => psb_s_upd_xyz_vect implicit none type(psb_s_vect_type), intent (inout) :: x type(psb_s_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_sabgdxyz_vect +end subroutine psb_s_upd_xyz_vect diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index 0f37a1f4..e93488e3 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info) end subroutine psb_zaddconst_vect -subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect + use psb_base_mod, psb_protect_name => psb_z_upd_xyz_vect implicit none type(psb_z_vect_type), intent (inout) :: x type(psb_z_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_zabgdxyz_vect +end subroutine psb_z_upd_xyz_vect diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90 index e230a1e0..e3f1d9a3 100644 --- a/base/serial/psi_c_serial_impl.F90 +++ b/base/serial/psi_c_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine caxpbyv2 -subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='cabgdxyz' + name='c_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_cabgdxyz +end subroutine psi_c_upd_xyz subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='cabgdxyz' + name='c_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90 index bf1b2917..d6a9a31d 100644 --- a/base/serial/psi_d_serial_impl.F90 +++ b/base/serial/psi_d_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine daxpbyv2 -subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='dabgdxyz' + name='d_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_dabgdxyz +end subroutine psi_d_upd_xyz subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='dabgdxyz' + name='d_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90 index 911ab4ec..37b11a94 100644 --- a/base/serial/psi_e_serial_impl.F90 +++ b/base/serial/psi_e_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine eaxpbyv2 -subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='eabgdxyz' + name='e_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_eabgdxyz +end subroutine psi_e_upd_xyz subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='eabgdxyz' + name='e_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90 index fb42dfcd..c20cd60b 100644 --- a/base/serial/psi_i2_serial_impl.F90 +++ b/base/serial/psi_i2_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine i2axpbyv2 -subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='i2abgdxyz' + name='i2_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_i2abgdxyz +end subroutine psi_i2_upd_xyz subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='i2abgdxyz' + name='i2_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90 index 346fd897..55913a16 100644 --- a/base/serial/psi_m_serial_impl.F90 +++ b/base/serial/psi_m_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine maxpbyv2 -subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='mabgdxyz' + name='m_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_mabgdxyz +end subroutine psi_m_upd_xyz subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='mabgdxyz' + name='m_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90 index 52f86bcd..c3846c8e 100644 --- a/base/serial/psi_s_serial_impl.F90 +++ b/base/serial/psi_s_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine saxpbyv2 -subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='sabgdxyz' + name='s_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_sabgdxyz +end subroutine psi_s_upd_xyz subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='sabgdxyz' + name='s_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90 index 7e680273..763eae22 100644 --- a/base/serial/psi_z_serial_impl.F90 +++ b/base/serial/psi_z_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine zaxpbyv2 -subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='zabgdxyz' + name='z_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_zabgdxyz +end subroutine psi_z_upd_xyz subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='zabgdxyz' + name='z_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c index cdfda481..65d41893 100644 --- a/cuda/cvectordev.c +++ b/cuda/cvectordev.c @@ -255,7 +255,7 @@ int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVe return(i); } -int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, +int upd_xyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, cuFloatComplex gamma, cuFloatComplex delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) { int j=0, i=0; @@ -268,7 +268,7 @@ int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; - spgpuCabgdxyz(handle,n, alpha,beta,gamma,delta, + spgpuCupd_xyz(handle,n, alpha,beta,gamma,delta, (cuFloatComplex *)devVecX->v_,(cuFloatComplex *) devVecY->v_,(cuFloatComplex *) devVecZ->v_); return(i); } diff --git a/cuda/cvectordev.h b/cuda/cvectordev.h index 62693e27..8c40b95d 100644 --- a/cuda/cvectordev.h +++ b/cuda/cvectordev.h @@ -69,7 +69,7 @@ int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY); -int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, +int upd_xyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, cuFloatComplex gamma, cuFloatComplex delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex a,cuFloatComplex b, diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c index 723f48d8..a69e1b71 100644 --- a/cuda/dvectordev.c +++ b/cuda/dvectordev.c @@ -241,7 +241,7 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX, return(i); } -int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, +int upd_xyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) { int j=0, i=0; int pitch = 0; @@ -253,7 +253,7 @@ int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, do if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; - spgpuDabgdxyz(handle,n, alpha,beta,gamma,delta, + spgpuDupd_xyz(handle,n, alpha,beta,gamma,delta, (double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_); return(i); } diff --git a/cuda/dvectordev.h b/cuda/dvectordev.h index c2bfa1b5..3834c0d3 100644 --- a/cuda/dvectordev.h +++ b/cuda/dvectordev.h @@ -67,7 +67,7 @@ int asumMultiVecDeviceDouble(double* y_res, int n, void* devVecA); int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY); -int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, +int upd_xyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f, void* devMultiVecX, void* devMultiVecY, diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index 2c2a4f61..45fafe0a 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -90,7 +90,7 @@ module psb_c_cuda_vect_mod procedure, pass(x) :: dot_a => c_cuda_dot_a procedure, pass(y) :: axpby_v => c_cuda_axpby_v procedure, pass(y) :: axpby_a => c_cuda_axpby_a - procedure, pass(z) :: abgdxyz => c_cuda_abgdxyz + procedure, pass(z) :: upd_xyz => c_cuda_upd_xyz procedure, pass(y) :: mlt_v => c_cuda_mlt_v procedure, pass(y) :: mlt_a => c_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine c_cuda_axpby_v - subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine c_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx d_cuda_dot_a procedure, pass(y) :: axpby_v => d_cuda_axpby_v procedure, pass(y) :: axpby_a => d_cuda_axpby_a - procedure, pass(z) :: abgdxyz => d_cuda_abgdxyz + procedure, pass(z) :: upd_xyz => d_cuda_upd_xyz procedure, pass(y) :: mlt_v => d_cuda_mlt_v procedure, pass(y) :: mlt_a => d_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine d_cuda_axpby_v - subroutine d_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine d_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx s_cuda_dot_a procedure, pass(y) :: axpby_v => s_cuda_axpby_v procedure, pass(y) :: axpby_a => s_cuda_axpby_a - procedure, pass(z) :: abgdxyz => s_cuda_abgdxyz + procedure, pass(z) :: upd_xyz => s_cuda_upd_xyz procedure, pass(y) :: mlt_v => s_cuda_mlt_v procedure, pass(y) :: mlt_a => s_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine s_cuda_axpby_v - subroutine s_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine s_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx z_cuda_dot_a procedure, pass(y) :: axpby_v => z_cuda_axpby_v procedure, pass(y) :: axpby_a => z_cuda_axpby_a - procedure, pass(z) :: abgdxyz => z_cuda_abgdxyz + procedure, pass(z) :: upd_xyz => z_cuda_upd_xyz procedure, pass(y) :: mlt_v => z_cuda_mlt_v procedure, pass(y) :: mlt_a => z_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine z_cuda_axpby_v - subroutine z_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine z_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nxcurrentStream>>>(n, alpha, beta, gamma, delta, + spgpuCupd_xyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, x, y, z); } diff --git a/cuda/spgpu/kernels/dabgdxyz.cu b/cuda/spgpu/kernels/dupd_xyz.cu similarity index 88% rename from cuda/spgpu/kernels/dabgdxyz.cu rename to cuda/spgpu/kernels/dupd_xyz.cu index f2b18e02..38957fe1 100644 --- a/cuda/spgpu/kernels/dabgdxyz.cu +++ b/cuda/spgpu/kernels/dupd_xyz.cu @@ -31,7 +31,7 @@ extern "C" #define BLOCK_SIZE 512 -__global__ void spgpuDabgdxyz_krn(int n, double alpha, double beta, double gamma, double delta, +__global__ void spgpuDupd_xyz_krn(int n, double alpha, double beta, double gamma, double delta, double* x, double *y, double *z) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; @@ -54,7 +54,7 @@ __global__ void spgpuDabgdxyz_krn(int n, double alpha, double beta, double gamma } -void spgpuDabgdxyz(spgpuHandle_t handle, +void spgpuDupd_xyz(spgpuHandle_t handle, int n, double alpha, double beta, @@ -73,7 +73,7 @@ void spgpuDabgdxyz(spgpuHandle_t handle, num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); - spgpuDabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + spgpuDupd_xyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, x, y, z); } diff --git a/cuda/spgpu/kernels/sabgdxyz.cu b/cuda/spgpu/kernels/supd_xyz.cu similarity index 88% rename from cuda/spgpu/kernels/sabgdxyz.cu rename to cuda/spgpu/kernels/supd_xyz.cu index 8c137ed3..d4ad1d0e 100644 --- a/cuda/spgpu/kernels/sabgdxyz.cu +++ b/cuda/spgpu/kernels/supd_xyz.cu @@ -31,7 +31,7 @@ extern "C" #define BLOCK_SIZE 512 -__global__ void spgpuSabgdxyz_krn(int n, float alpha, float beta, float gamma, float delta, +__global__ void spgpuSupd_xyz_krn(int n, float alpha, float beta, float gamma, float delta, float* x, float *y, float *z) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; @@ -54,7 +54,7 @@ __global__ void spgpuSabgdxyz_krn(int n, float alpha, float beta, float gamma, f } -void spgpuSabgdxyz(spgpuHandle_t handle, +void spgpuSupd_xyz(spgpuHandle_t handle, int n, float alpha, float beta, @@ -73,7 +73,7 @@ void spgpuSabgdxyz(spgpuHandle_t handle, num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); - spgpuSabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + spgpuSupd_xyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, x, y, z); } diff --git a/cuda/spgpu/kernels/zabgdxyz.cu b/cuda/spgpu/kernels/zupd_xyz.cu similarity index 88% rename from cuda/spgpu/kernels/zabgdxyz.cu rename to cuda/spgpu/kernels/zupd_xyz.cu index 48def937..a3936ee9 100644 --- a/cuda/spgpu/kernels/zabgdxyz.cu +++ b/cuda/spgpu/kernels/zupd_xyz.cu @@ -31,7 +31,7 @@ extern "C" #define BLOCK_SIZE 512 -__global__ void spgpuZabgdxyz_krn(int n, cuDoubleComplex alpha, cuDoubleComplex beta, +__global__ void spgpuZupd_xyz_krn(int n, cuDoubleComplex alpha, cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, cuDoubleComplex * x, cuDoubleComplex *y, cuDoubleComplex *z) { @@ -55,7 +55,7 @@ __global__ void spgpuZabgdxyz_krn(int n, cuDoubleComplex alpha, cuDoubleComplex } -void spgpuZabgdxyz(spgpuHandle_t handle, +void spgpuZupd_xyz(spgpuHandle_t handle, int n, cuDoubleComplex alpha, cuDoubleComplex beta, @@ -74,7 +74,7 @@ void spgpuZabgdxyz(spgpuHandle_t handle, num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); - spgpuZabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + spgpuZupd_xyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, x, y, z); } diff --git a/cuda/spgpu/vector.h b/cuda/spgpu/vector.h index d08e6edd..26c3443d 100644 --- a/cuda/spgpu/vector.h +++ b/cuda/spgpu/vector.h @@ -182,7 +182,7 @@ void spgpuSaxpby(spgpuHandle_t handle, __device float* x); -void spgpuSabgdxyz(spgpuHandle_t handle, +void spgpuSupd_xyz(spgpuHandle_t handle, int n, float alpha, float beta, @@ -487,7 +487,7 @@ void spgpuDaxpby(spgpuHandle_t handle, __device double* x); -void spgpuDabgdxyz(spgpuHandle_t handle, +void spgpuDupd_xyz(spgpuHandle_t handle, int n, double alpha, double beta, @@ -789,7 +789,7 @@ void spgpuCaxpby(spgpuHandle_t handle, __device cuFloatComplex* x); -void spgpuCabgdxyz(spgpuHandle_t handle, +void spgpuCupd_xyz(spgpuHandle_t handle, int n, cuFloatComplex alpha, cuFloatComplex beta, @@ -1092,7 +1092,7 @@ void spgpuZaxpby(spgpuHandle_t handle, __device cuDoubleComplex* x); -void spgpuZabgdxyz(spgpuHandle_t handle, +void spgpuZupd_xyz(spgpuHandle_t handle, int n, cuDoubleComplex alpha, cuDoubleComplex beta, diff --git a/cuda/svectordev.c b/cuda/svectordev.c index bf7545bb..cfaef5ce 100644 --- a/cuda/svectordev.c +++ b/cuda/svectordev.c @@ -241,7 +241,7 @@ int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX, return(i); } -int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, +int upd_xyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) { int j=0, i=0; int pitch = 0; @@ -253,7 +253,7 @@ int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; - spgpuSabgdxyz(handle,n, alpha,beta,gamma,delta, + spgpuSupd_xyz(handle,n, alpha,beta,gamma,delta, (float*)devVecX->v_,(float*) devVecY->v_,(float*) devVecZ->v_); return(i); } diff --git a/cuda/svectordev.h b/cuda/svectordev.h index 363c0108..d5c85f78 100644 --- a/cuda/svectordev.h +++ b/cuda/svectordev.h @@ -67,7 +67,7 @@ int asumMultiVecDeviceFloat(float* y_res, int n, void* devVecA); int dotMultiVecDeviceFloat(float* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceFloat(int n, float alpha, void* devVecX, float beta, void* devVecY); -int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, +int upd_xyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int xyzwMultiVecDeviceFloat(int n,float a,float b, float c, float d, float e, float f, void* devMultiVecX, void* devMultiVecY, diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c index e9f0cec7..d7d88f1b 100644 --- a/cuda/zvectordev.c +++ b/cuda/zvectordev.c @@ -234,7 +234,7 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMulti return(i); } -int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, +int upd_xyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) { int j=0, i=0; @@ -247,7 +247,7 @@ int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; - spgpuZabgdxyz(handle,n, alpha,beta,gamma,delta, + spgpuZupd_xyz(handle,n, alpha,beta,gamma,delta, (cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_,(cuDoubleComplex *) devVecZ->v_); return(i); } diff --git a/cuda/zvectordev.h b/cuda/zvectordev.h index ae623bdb..e15802f0 100644 --- a/cuda/zvectordev.h +++ b/cuda/zvectordev.h @@ -77,7 +77,7 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, int axpbyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void* devVecX, cuDoubleComplex beta, void* devVecY); -int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, +int upd_xyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int xyzwMultiVecDeviceDoubleComplex(int n,cuDoubleComplex a, cuDoubleComplex b, diff --git a/krylov/psb_dcg.F90 b/krylov/psb_dcg.F90 index caebb712..669573be 100644 --- a/krylov/psb_dcg.F90 +++ b/krylov/psb_dcg.F90 @@ -129,8 +129,6 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& logical :: do_cond character(len=20) :: name character(len=*), parameter :: methdname='CG' - logical, parameter :: do_timings=.true. - integer(psb_ipk_), save :: cg_vect=-1, cg_mv=-1, cg_prec=-1 info = psb_success_ name = 'psb_dcg' @@ -151,12 +149,6 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_errpush(info,name) goto 9999 endif - if ((do_timings).and.(cg_vect==-1)) & - & cg_vect = psb_get_timer_idx("CG: vector ops ") - if ((do_timings).and.(cg_mv==-1)) & - & cg_mv = psb_get_timer_idx("CG: MV product") - if ((do_timings).and.(cg_prec==-1)) & - & cg_prec = psb_get_timer_idx("CG: preconditioner") mglob = desc_a%get_global_rows() @@ -227,21 +219,17 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& ! = ! = r0 = b-Ax0 ! = - if (do_timings) call psb_tic(cg_vect) if (itx>= itmax_) exit restart + it = 0 call psb_geaxpby(done,b,dzero,r,desc_a,info) - if (do_timings) call psb_toc(cg_vect) - if (do_timings) call psb_tic(cg_mv) if (info == psb_success_) call psb_spmm(-done,a,x,done,r,desc_a,info,work=aux) if (info /= psb_success_) then info=psb_err_from_subroutine_non_ call psb_errpush(info,name) goto 9999 end if - if (do_timings) call psb_toc(cg_mv) - - if (do_timings) call psb_tic(cg_vect) + rho = dzero call psb_init_conv(methdname,istop_,itrace_,itmax_,a,x,b,eps,desc_a,stopdat,info) @@ -249,18 +237,13 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_errpush(psb_err_from_subroutine_non_,name) goto 9999 End If - if (do_timings) call psb_toc(cg_vect) - + iteration: do it = it + 1 itx = itx + 1 - if (do_timings) call psb_tic(cg_prec) - - call prec%apply(r,z,desc_a,info,work=aux) - if (do_timings) call psb_toc(cg_prec) - if (do_timings) call psb_tic(cg_vect) + call prec%apply(r,z,desc_a,info,work=aux) rho_old = rho rho = psb_gedot(r,z,desc_a,info) @@ -271,18 +254,13 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& if (debug_level >= psb_debug_ext_)& & write(debug_unit,*) me,' ',trim(name),& & ': CG Iteration breakdown rho' - if (do_timings) call psb_toc(cg_vect) exit iteration endif beta = rho/rho_old call psb_geaxpby(done,z,beta,p,desc_a,info) end if - if (do_timings) call psb_toc(cg_vect) - if (do_timings) call psb_tic(cg_mv) call psb_spmm(done,a,p,dzero,q,desc_a,info,work=aux) - if (do_timings) call psb_toc(cg_mv) - if (do_timings) call psb_tic(cg_vect) sigma = psb_gedot(p,q,desc_a,info) if (sigma == dzero) then if (debug_level >= psb_debug_ext_)& @@ -315,7 +293,6 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& end do iteration end do restart - if (do_timings) call psb_toc(cg_vect) if (do_cond) then if (me == psb_root_) then #if defined(HAVE_LAPACK) diff --git a/test/cudakern/Makefile b/test/cudakern/Makefile index 5d938973..41cef197 100755 --- a/test/cudakern/Makefile +++ b/test/cudakern/Makefile @@ -16,48 +16,26 @@ LDLIBS=$(PSBGPULDLIBS) FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES) -ZTOBJS=z_file_spmv.o data_input.o -CTOBJS=c_file_spmv.o data_input.o -DTOBJS=d_file_spmv.o data_input.o -STOBJS=s_file_spmv.o data_input.o -DPGOBJS=dpdegenmv.o -SPGOBJS=spdegenmv.o +DPGOBJS=dpdegenmv.o data_input.o +SPGOBJS=spdegenmv.o data_input.o EXEDIR=./runs -all: dir pgen file +all: dir pgen pgen: dpdegenmv spdegenmv -file: s_file_spmv c_file_spmv d_file_spmv z_file_spmv -dpdegenmv spdegenmv s_file_spmv c_file_spmv d_file_spmv z_file_spmv: dir +dpdegenmv spdegenmv: dir dir: (if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi) - -dpdegenmv: $(DPGOBJS) +dpdegenmv.o spdegenmv.o: data_input.o +dpdegenmv: $(DPGOBJS) $(FLINK) $(LOPT) $(DPGOBJS) -fopenmp -o dpdegenmv $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) /bin/mv dpdegenmv $(EXEDIR) spdegenmv: $(SPGOBJS) $(FLINK) $(LOPT) $(SPGOBJS) -o spdegenmv $(PSBLAS_LIB) $(LDLIBS) /bin/mv spdegenmv $(EXEDIR) -z_file_spmv: $(ZTOBJS) - $(FLINK) $(LOPT) $(ZTOBJS) -o z_file_spmv $(PSBLAS_LIB) $(LDLIBS) - /bin/mv z_file_spmv $(EXEDIR) -c_file_spmv: $(CTOBJS) - $(FLINK) $(LOPT) $(CTOBJS) -o c_file_spmv $(PSBLAS_LIB) $(LDLIBS) - /bin/mv c_file_spmv $(EXEDIR) -d_file_spmv: $(DTOBJS) - $(FLINK) $(LOPT) $(DTOBJS) -o d_file_spmv $(PSBLAS_LIB) $(LDLIBS) - /bin/mv d_file_spmv $(EXEDIR) -s_file_spmv: $(STOBJS) - $(FLINK) $(LOPT) $(STOBJS) -o s_file_spmv $(PSBLAS_LIB) $(LDLIBS) - /bin/cp s_file_spmv $(EXEDIR) - -d_file_spmv.o s_file_spmv.o z_file_spmv.o c_file_spmv.o: data_input.o - clean: /bin/rm -f $(DTOBJS) $(STOBJS) $(DPGOBJS) $(SPGOBJS) $(ZTOBJS) $(CTOBJS) \ - $(EXEDIR)/dpdegenmv $(EXEDIR)/spdegenmv \ - $(EXEDIR)/d_file_spmv $(EXEDIR)/s_file_spmv \ - $(EXEDIR)/z_file_spmv $(EXEDIR)/c_file_spmv + $(EXEDIR)/dpdegenmv $(EXEDIR)/spdegenmv *mod lib: (cd ../../; make library) diff --git a/test/cudakern/c_file_spmv.F90 b/test/cudakern/c_file_spmv.F90 deleted file mode 100644 index 2f9840ec..00000000 --- a/test/cudakern/c_file_spmv.F90 +++ /dev/null @@ -1,491 +0,0 @@ -! -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -program c_file_spmv - use psb_base_mod - use psb_util_mod - use psb_ext_mod -#ifdef HAVE_GPU - use psb_gpu_mod -#endif - use data_input - implicit none - - ! input parameters - character(len=200) :: mtrx_file - - ! sparse matrices - type(psb_cspmat_type) :: a, aux_a, agpu - - ! dense matrices - complex(psb_spk_), allocatable, target :: aux_b(:,:), d(:) - complex(psb_spk_), allocatable , save :: x_col_glob(:), r_col_glob(:) - complex(psb_spk_), pointer :: b_col_glob(:) - type(psb_c_vect_type) :: b_col, x_col, r_col - type(psb_c_vect_type) :: xg, bg, xv, bv -#ifdef HAVE_GPU - type(psb_c_vect_gpu) :: vmold -#endif - complex(psb_spk_), allocatable :: xc1(:),xc2(:) - ! communications data structure - type(psb_desc_type):: desc_a - - type(psb_ctxt_type) :: ctxt - integer :: iam, np - integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes - real(psb_spk_) :: damatsize, dgmatsize - complex(psb_spk_) :: err, eps - - character(len=5) :: acfmt, agfmt - character(len=20) :: name - character(len=2) :: filefmt - integer, parameter :: iunit=12 - integer, parameter :: times=2000 - integer, parameter :: ntests=200, ngpu=50, ncnv=20 - - type(psb_c_coo_sparse_mat), target :: acoo - type(psb_c_csr_sparse_mat), target :: acsr - type(psb_c_ell_sparse_mat), target :: aell - type(psb_c_hll_sparse_mat), target :: ahll -#ifdef HAVE_GPU - type(psb_c_elg_sparse_mat), target :: aelg - type(psb_c_csrg_sparse_mat), target :: acsrg - type(psb_c_hybg_sparse_mat), target :: ahybg - type(psb_c_hlg_sparse_mat), target :: ahlg -#endif - class(psb_c_base_sparse_mat), pointer :: acmold, agmold - ! other variables - integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg - integer :: internal, m,ii,nnzero - real(psb_dpk_) :: t0,t1, t2, tprec, flops - real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& - & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 - integer :: nrhs, nrow, n_row, dim, nv, ne - integer, allocatable :: ivg(:), ipv(:) - - - call psb_init(ctxt) - call psb_info(ctxt,iam,np) -#ifdef HAVE_GPU - call psb_gpu_init(ctxt) -#endif - if (iam < 0) then - ! This should not happen, but just in case - call psb_exit(ctxt) - stop - endif - - - name='file_spmv' - if(psb_get_errstatus() /= 0) goto 9999 - info=psb_success_ - call psb_set_errverbosity(2) - if (iam == psb_root_) then - write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ - write(*,*) 'This is the ',trim(name),' sample program' - end if -#ifdef HAVE_GPU - write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) -#endif - - if (iam == 0) then - write(*,*) 'Matrix? ' - call read_data(mtrx_file,psb_inp_unit) - write(*,*) 'file format' - call read_data(filefmt,psb_inp_unit) - write(*,*) 'CPU format' - call read_data(acfmt,psb_inp_unit) - write(*,*) 'GPU format' - call read_data(agfmt,psb_inp_unit) - write(*,*) 'distribution ' - call read_data(ipart,psb_inp_unit) - write(*,*) 'Read all data, going on' - end if - call psb_bcast(ctxt,mtrx_file) - call psb_bcast(ctxt,filefmt) - call psb_bcast(ctxt,acfmt) - call psb_bcast(ctxt,agfmt) - call psb_bcast(ctxt,ipart) - call psb_barrier(ctxt) - t0 = psb_wtime() - ! read the input matrix to be processed and (possibly) the rhs - nrhs = 1 - - if (iam==psb_root_) then - select case(psb_toupper(filefmt)) - case('MM') - ! For Matrix Market we have an input file for the matrix - ! and an (optional) second file for the RHS. - call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case ('HB') - ! For Harwell-Boeing we have a single file which may or may not - ! contain an RHS. - call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case default - info = -1 - write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt - end select - if (info /= 0) then - write(psb_err_unit,*) 'Error while reading input matrix ' - call psb_abort(ctxt) - end if - - ! - ! Always get nnz from original matrix. - ! Some formats add fill-in and do not keep track - ! of how many were added. So if the original matrix - ! contained some extra zeros, the count of entries - ! is not recoverable exactly. - ! - nrt = aux_a%get_nrows() - annz = aux_a%get_nzeros() - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - write(psb_out_unit,'("Generating an rhs...")') - write(psb_out_unit,'(" ")') - call psb_realloc(nrt,1,aux_b,info) - if (info /= 0) then - call psb_errpush(4000,name) - goto 9999 - endif - - b_col_glob => aux_b(:,1) - do i=1, nrt - b_col_glob(i) = 1.d0 - enddo - - else - - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - end if - - - select case(psb_toupper(acfmt)) - case('COO') - acmold => acoo - case('CSR') - acmold => acsr - case('ELL') - acmold => aell - case('HLL') - acmold => ahll - case default - write(*,*) 'Unknown format defaulting to CSR' - acmold => acsr - end select - -#ifdef HAVE_GPU - select case(psb_toupper(agfmt)) - case('ELG') - agmold => aelg - case('HLG') - agmold => ahlg - case('CSRG') - agmold => acsrg - case('HYBG') - agmold => ahybg - case default - write(*,*) 'Unknown format defaulting to HLG' - agmold => ahlg - end select -#endif - - - ! switch over different partition types - if (ipart == 0) then - call psb_barrier(ctxt) - if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') - allocate(ivg(nrt),ipv(np)) - do i=1,nrt - call part_block(i,nrt,np,ipv,nv) - ivg(i) = ipv(1) - enddo - call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) - else if (ipart == 2) then - if (iam==psb_root_) then - write(psb_out_unit,'("Partition type: graph")') - write(psb_out_unit,'(" ")') - ! write(psb_err_unit,'("Build type: graph")') - call build_mtpart(aux_a,np) - endif - call psb_barrier(ctxt) - call distr_mtpart(psb_root_,ctxt) - call getv_mtpart(ivg) - call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) - else - if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') - call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) - end if - - call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) - - t2 = psb_wtime() - t0 - - call psb_amx(ctxt, t2) - - if (iam==psb_root_) then - write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 - write(psb_out_unit,'(" ")') - end if - call a%cscnv(aux_a,info,mold=acoo) - tcnvcsr = 0 - tcnvgpu = 0 - nr = desc_a%get_local_rows() - nrg = desc_a%get_global_rows() - call psb_geall(x_col,desc_a,info) - do i=1, nr - call desc_a%l2g(i,ig,info) - call psb_geins(ione,(/ig/),(/(cone + (cone*ig)/nrg)/),x_col,desc_a,info) - end do - call psb_geasb(x_col,desc_a,info) - do j=1, ncnv - call aux_a%cscnv(a,info,mold=acoo) - call psb_barrier(ctxt) - t1 = psb_wtime() - call a%cscnv(info,mold=acmold) - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - tcnvcsr = tcnvcsr + t2 - if (j==1) tcnvc1 = t2 - xc1 = x_col%get_vect() - call xv%bld(xc1) - call psb_geasb(bv,desc_a,info,scratch=.true.) - -#ifdef HAVE_GPU - - call aux_a%cscnv(agpu,info,mold=acoo) - call xg%bld(xc1,mold=vmold) - call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) - call psb_barrier(ctxt) - t1 = psb_wtime() - call agpu%cscnv(info,mold=agmold) - call psb_gpu_DeviceSync() - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - if (j==1) tcnvg1 = t2 - tcnvgpu = tcnvgpu + t2 -#endif - end do - - call psb_barrier(ctxt) - t1 = psb_wtime() - do i=1,ntests - call psb_spmm(cone,a,xv,czero,bv,desc_a,info) - end do - call psb_barrier(ctxt) - t2 = psb_wtime() - t1 - call psb_amx(ctxt,t2) - -#ifdef HAVE_GPU - ! FIXME: cache flush needed here - call psb_barrier(ctxt) - tt1 = psb_wtime() - do i=1,ntests - call psb_spmm(cone,agpu,xv,czero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 1 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - tt2 = psb_wtime() - tt1 - call psb_amx(ctxt,tt2) - xc1 = bv%get_vect() - xc2 = bg%get_vect() - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on xGPU',eps - - call xg%sync() - ! FIXME: cache flush needed here - - call psb_barrier(ctxt) - gt1 = psb_wtime() - do i=1,ntests*ngpu - call psb_spmm(cone,agpu,xg,czero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 2 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - ! For timing purposes we need to make sure all threads - ! in the device are done. - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - gt2 = psb_wtime() - gt1 - call psb_amx(ctxt,gt2) - call bg%sync() - xc1 = bv%get_vect() - xc2 = bg%get_vect() - call psb_geaxpby(-cone,bg,+cone,bv,desc_a,info) - eps = psb_geamax(bv,desc_a,info) - - call psb_amx(ctxt,t2) - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on GPU',eps -#endif - - - amatsize = a%sizeof() - agmatsize = agpu%sizeof() - damatsize = amatsize - damatsize = damatsize/(1024*1024) - dgmatsize = agmatsize - dgmatsize = dgmatsize/(1024*1024) - descsize = psb_sizeof(desc_a) - call psb_sum(ctxt,damatsize) - call psb_sum(ctxt,dgmatsize) - call psb_sum(ctxt,descsize) - - if (iam == psb_root_) then - write(psb_out_unit,'("Matrix: ",a)') mtrx_file - write(psb_out_unit,& - &'("Test on : ",i20," processors")') np - write(psb_out_unit,& - &'("Size of matrix : ",i20," ")') nrt - write(psb_out_unit,& - &'("Number of nonzeros : ",i20," ")') annz - write(psb_out_unit,& - &'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize - write(psb_out_unit,& - &'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize - write(psb_out_unit,& - &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize - write(psb_out_unit,& - &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize - flops = ntests*(2.d0*annz) - tflops = flops - gflops = flops * ngpu - write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() -#ifdef HAVE_GPU - write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() - write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& - & tcnvc1 - write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& - & tcnvcsr - write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& - & tcnvcsr/ncnv - write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& - & tcnvg1 - write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& - & tcnvgpu - write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& - & tcnvgpu/ncnv - -#endif - write(psb_out_unit,& - & '("Number of flops (",i0," prod) : ",F20.0," ")') & - & ntests,flops - - flops = flops / (t2) - tflops = tflops / (tt2) - gflops = gflops / (gt2) - write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& - & ntests,t2 - write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& - & t2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& - & flops/1.d6 -#ifdef HAVE_GPU - - write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& - & ntests, tt2 - write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& - & tt2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& - & tflops/1.d6 - - write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& - & ngpu*ntests,gt2 - write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& - & gt2*1.d3/(1.d0*ntests*ngpu) - write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& - & gflops/1.d6 -#endif - ! - ! This computation assumes the data movement associated with CSR: - ! it is minimal in terms of coefficients. Other formats may either move - ! more data (padding etc.) or less data (if they can save on the indices). - ! - nbytes = nr*(2*2*psb_sizeof_dp + psb_sizeof_ip)+& - & annz*(2*psb_sizeof_dp + psb_sizeof_ip) - bdwdth = ntests*nbytes/(t2*1.d6) - write(psb_out_unit,*) - write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth -#ifdef HAVE_GPU - bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) - write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth -#endif - write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() - write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize - - end if - - call psb_gefree(b_col, desc_a,info) - call psb_gefree(x_col, desc_a,info) - call psb_gefree(xv, desc_a,info) - call psb_gefree(bv, desc_a,info) - call psb_spfree(a, desc_a,info) -#ifdef HAVE_GPU - call psb_gefree(xg, desc_a,info) - call psb_gefree(bg, desc_a,info) - call psb_spfree(agpu,desc_a,info) - call psb_gpu_exit() -#endif - call psb_cdfree(desc_a,info) - - call psb_exit(ctxt) - stop - -9999 continue - call psb_error(ctxt) - -end program c_file_spmv - - - - - diff --git a/test/cudakern/d_file_spmv.F90 b/test/cudakern/d_file_spmv.F90 deleted file mode 100644 index 2bbc0bc4..00000000 --- a/test/cudakern/d_file_spmv.F90 +++ /dev/null @@ -1,496 +0,0 @@ -! -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -program d_file_spmv - use psb_base_mod - use psb_util_mod - use psb_ext_mod -#ifdef HAVE_GPU - use psb_gpu_mod -#endif - use data_input - implicit none - - ! input parameters - character(len=200) :: mtrx_file - - ! sparse matrices - type(psb_dspmat_type) :: a, aux_a, agpu - - ! dense matrices - real(psb_dpk_), allocatable, target :: aux_b(:,:), d(:) - real(psb_dpk_), allocatable , save :: x_col_glob(:), r_col_glob(:) - real(psb_dpk_), pointer :: b_col_glob(:) - type(psb_d_vect_type) :: b_col, x_col, r_col - type(psb_d_vect_type) :: xg, bg, xv, bv -#ifdef HAVE_GPU - type(psb_d_vect_gpu) :: vmold -#endif - real(psb_dpk_), allocatable :: xc1(:),xc2(:) - ! communications data structure - type(psb_desc_type):: desc_a - - type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: iam, np - integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes - real(psb_dpk_) :: err, eps, damatsize, dgmatsize - - character(len=5) :: acfmt, agfmt - character(len=20) :: name - character(len=2) :: filefmt - integer, parameter :: iunit=12 - integer, parameter :: times=2000 - integer, parameter :: ntests=200, ngpu=50, ncnv=20 - - type(psb_d_coo_sparse_mat), target :: acoo - type(psb_d_csr_sparse_mat), target :: acsr - type(psb_d_ell_sparse_mat), target :: aell - type(psb_d_hll_sparse_mat), target :: ahll - type(psb_d_hdia_sparse_mat), target :: ahdia -#ifdef HAVE_GPU - type(psb_d_elg_sparse_mat), target :: aelg - type(psb_d_csrg_sparse_mat), target :: acsrg - type(psb_d_hybg_sparse_mat), target :: ahybg - type(psb_d_hlg_sparse_mat), target :: ahlg - type(psb_d_hdiag_sparse_mat), target :: ahdiag -#endif - class(psb_d_base_sparse_mat), pointer :: acmold, agmold - ! other variables - integer(psb_lpk_) :: i,j,nrt, ns, nr, ig, nrg - integer(psb_ipk_) :: internal, m,ii,nnzero,info, ipart - real(psb_dpk_) :: t0,t1, t2, tprec, flops - real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& - & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 - integer :: nrhs, nrow, n_row, dim, nv, ne - integer, allocatable :: ivg(:), ipv(:) - - - call psb_init(ctxt) - call psb_info(ctxt,iam,np) -#ifdef HAVE_GPU - call psb_gpu_init(ctxt) -#endif - if (iam < 0) then - ! This should not happen, but just in case - call psb_exit(ctxt) - stop - endif - - - name='file_spmv' - if(psb_get_errstatus() /= 0) goto 9999 - info=psb_success_ - call psb_set_errverbosity(2) - if (iam == psb_root_) then - write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ - write(*,*) 'This is the ',trim(name),' sample program' - end if -#ifdef HAVE_GPU - write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) -#endif - - if (iam == 0) then - write(*,*) 'Matrix? ' - call read_data(mtrx_file,psb_inp_unit) - write(*,*) 'file format' - call read_data(filefmt,psb_inp_unit) - write(*,*) 'CPU format' - call read_data(acfmt,psb_inp_unit) - write(*,*) 'GPU format' - call read_data(agfmt,psb_inp_unit) - write(*,*) 'distribution ' - call read_data(ipart,psb_inp_unit) - write(*,*) 'Read all data, going on' - end if - call psb_bcast(ctxt,mtrx_file) - call psb_bcast(ctxt,filefmt) - call psb_bcast(ctxt,acfmt) - call psb_bcast(ctxt,agfmt) - call psb_bcast(ctxt,ipart) - call psb_barrier(ctxt) - t0 = psb_wtime() - ! read the input matrix to be processed and (possibly) the rhs - nrhs = 1 - - if (iam==psb_root_) then - select case(psb_toupper(filefmt)) - case('MM') - ! For Matrix Market we have an input file for the matrix - ! and an (optional) second file for the RHS. - call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case ('HB') - ! For Harwell-Boeing we have a single file which may or may not - ! contain an RHS. - call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case default - info = -1 - write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt - end select - if (info /= 0) then - write(psb_err_unit,*) 'Error while reading input matrix ' - call psb_abort(ctxt) - end if - - ! - ! Always get nnz from original matrix. - ! Some formats add fill-in and do not keep track - ! of how many were added. So if the original matrix - ! contained some extra zeros, the count of entries - ! is not recoverable exactly. - ! - nrt = aux_a%get_nrows() - annz = aux_a%get_nzeros() - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - write(psb_out_unit,'("Generating an rhs...")') - write(psb_out_unit,'(" ")') - call psb_realloc(nrt,1,aux_b,info) - if (info /= 0) then - call psb_errpush(4000,name) - goto 9999 - endif - - b_col_glob => aux_b(:,1) - do i=1, nrt - b_col_glob(i) = 1.d0 - enddo - - else - - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - end if - - - select case(psb_toupper(acfmt)) - case('COO') - acmold => acoo - case('CSR') - acmold => acsr - case('ELL') - acmold => aell - case('HLL') - acmold => ahll - case('HDIA') - acmold => ahdia - case default - write(*,*) 'Unknown format defaulting to CSR' - acmold => acsr - end select - -#ifdef HAVE_GPU - select case(psb_toupper(agfmt)) - case('ELG') - agmold => aelg - case('HLG') - agmold => ahlg - case('CSRG') - agmold => acsrg - case('HYBG') - agmold => ahybg - case('HDIAG') - agmold => ahdiag - case default - write(*,*) 'Unknown format defaulting to HLG' - agmold => ahlg - end select -#endif - - - ! switch over different partition types - if (ipart == 0) then - call psb_barrier(ctxt) - if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') - allocate(ivg(nrt),ipv(np)) - do i=1,nrt - call part_block(i,nrt,np,ipv,nv) - ivg(i) = ipv(1) - enddo - call psb_matdist(aux_a, a, ctxt, desc_a,info,vg=ivg) - else if (ipart == 2) then - if (iam==psb_root_) then - write(psb_out_unit,'("Partition type: graph")') - write(psb_out_unit,'(" ")') - ! write(psb_err_unit,'("Build type: graph")') - call build_mtpart(aux_a,np) - endif - call psb_barrier(ctxt) - call distr_mtpart(psb_root_,ctxt) - call getv_mtpart(ivg) - call psb_matdist(aux_a, a, ctxt, desc_a,info,vg=ivg) - else - if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') - call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) - end if - - call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) - - t2 = psb_wtime() - t0 - - call psb_amx(ctxt, t2) - - if (iam==psb_root_) then - write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 - write(psb_out_unit,'(" ")') - end if - call a%cscnv(aux_a,info,mold=acoo) - tcnvcsr = 0 - tcnvgpu = 0 - nr = desc_a%get_local_rows() - nrg = desc_a%get_global_rows() - call psb_geall(x_col,desc_a,info) - do i=1, nr - call desc_a%l2g(i,ig,info) - call psb_geins(ione,(/ig/),(/(done + (done*ig)/nrg)/),x_col,desc_a,info) - end do - call psb_geasb(x_col,desc_a,info) - do j=1, ncnv - call aux_a%cscnv(a,info,mold=acoo) - call psb_barrier(ctxt) - t1 = psb_wtime() - call a%cscnv(info,mold=acmold) - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - tcnvcsr = tcnvcsr + t2 - if (j==1) tcnvc1 = t2 - xc1 = x_col%get_vect() - call xv%bld(xc1) - call psb_geasb(bv,desc_a,info,scratch=.true.) - -#ifdef HAVE_GPU - - call aux_a%cscnv(agpu,info,mold=acoo) - call xg%bld(xc1,mold=vmold) - call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) - call psb_barrier(ctxt) - t1 = psb_wtime() - call agpu%cscnv(info,mold=agmold) - call psb_gpu_DeviceSync() - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - if (j==1) tcnvg1 = t2 - tcnvgpu = tcnvgpu + t2 -#endif - end do - - call psb_barrier(ctxt) - t1 = psb_wtime() - do i=1,ntests - call psb_spmm(done,a,xv,dzero,bv,desc_a,info) - end do - call psb_barrier(ctxt) - t2 = psb_wtime() - t1 - call psb_amx(ctxt,t2) - -#ifdef HAVE_GPU - ! FIXME: cache flush needed here - call psb_barrier(ctxt) - tt1 = psb_wtime() - do i=1,ntests - call psb_spmm(done,agpu,xv,dzero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 1 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - tt2 = psb_wtime() - tt1 - call psb_amx(ctxt,tt2) - xc1 = bv%get_vect() - xc2 = bg%get_vect() - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on xGPU',eps - - call xg%sync() - ! FIXME: cache flush needed here - - call psb_barrier(ctxt) - gt1 = psb_wtime() - do i=1,ntests*ngpu - call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 2 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - ! For timing purposes we need to make sure all threads - ! in the device are done. - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - gt2 = psb_wtime() - gt1 - call psb_amx(ctxt,gt2) - call bg%sync() - xc1 = bv%get_vect() - xc2 = bg%get_vect() - call psb_geaxpby(-done,bg,+done,bv,desc_a,info) - eps = psb_geamax(bv,desc_a,info) - - call psb_amx(ctxt,t2) - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on GPU',eps -#endif - - - amatsize = a%sizeof() - agmatsize = agpu%sizeof() - damatsize = amatsize - damatsize = damatsize/(1024*1024) - dgmatsize = agmatsize - dgmatsize = dgmatsize/(1024*1024) - descsize = psb_sizeof(desc_a) - call psb_sum(ctxt,damatsize) - call psb_sum(ctxt,dgmatsize) - call psb_sum(ctxt,descsize) - - if (iam == psb_root_) then - write(psb_out_unit,'("Matrix: ",a)') mtrx_file - write(psb_out_unit,& - &'("Test on : ",i20," processors")') np - write(psb_out_unit,& - &'("Size of matrix : ",i20," ")') nrt - write(psb_out_unit,& - &'("Number of nonzeros : ",i20," ")') annz - write(psb_out_unit,& - &'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize - write(psb_out_unit,& - &'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize - write(psb_out_unit,& - &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize - write(psb_out_unit,& - &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize - flops = ntests*(2.d0*annz) - tflops = flops - gflops = flops * ngpu - write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() -#ifdef HAVE_GPU - write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() - write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& - & tcnvc1 - write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& - & tcnvcsr - write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& - & tcnvcsr/ncnv - write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& - & tcnvg1 - write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& - & tcnvgpu - write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& - & tcnvgpu/ncnv - -#endif - write(psb_out_unit,& - & '("Number of flops (",i0," prod) : ",F20.0," ")') & - & ntests,flops - - flops = flops / (t2) - tflops = tflops / (tt2) - gflops = gflops / (gt2) - write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& - & ntests,t2 - write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& - & t2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& - & flops/1.d6 -#ifdef HAVE_GPU - - write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& - & ntests, tt2 - write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& - & tt2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& - & tflops/1.d6 - - write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& - & ngpu*ntests,gt2 - write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& - & gt2*1.d3/(1.d0*ntests*ngpu) - write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& - & gflops/1.d6 -#endif - ! - ! This computation assumes the data movement associated with CSR: - ! it is minimal in terms of coefficients. Other formats may either move - ! more data (padding etc.) or less data (if they can save on the indices). - ! - nbytes = nr*(2*psb_sizeof_dp + psb_sizeof_ip)+& - & annz*(psb_sizeof_dp + psb_sizeof_ip) - bdwdth = ntests*nbytes/(t2*1.d6) - write(psb_out_unit,*) - write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth -#ifdef HAVE_GPU - bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) - write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth -#endif - write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() - write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize - - end if - - call psb_gefree(b_col, desc_a,info) - call psb_gefree(x_col, desc_a,info) - call psb_gefree(xv, desc_a,info) - call psb_gefree(bv, desc_a,info) - call psb_spfree(a, desc_a,info) -#ifdef HAVE_GPU - call psb_gefree(xg, desc_a,info) - call psb_gefree(bg, desc_a,info) - call psb_spfree(agpu,desc_a,info) - call psb_gpu_exit() -#endif - call psb_cdfree(desc_a,info) - - call psb_exit(ctxt) - stop - -9999 continue - call psb_error(ctxt) - -end program d_file_spmv - - - - - diff --git a/test/cudakern/s_file_spmv.F90 b/test/cudakern/s_file_spmv.F90 deleted file mode 100644 index 37a52717..00000000 --- a/test/cudakern/s_file_spmv.F90 +++ /dev/null @@ -1,496 +0,0 @@ -! -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -program s_file_spmv - use psb_base_mod - use psb_util_mod - use psb_ext_mod -#ifdef HAVE_GPU - use psb_gpu_mod -#endif - use data_input - implicit none - - ! input parameters - character(len=200) :: mtrx_file - - ! sparse matrices - type(psb_sspmat_type) :: a, aux_a, agpu - - ! dense matrices - real(psb_spk_), allocatable, target :: aux_b(:,:), d(:) - real(psb_spk_), allocatable , save :: x_col_glob(:), r_col_glob(:) - real(psb_spk_), pointer :: b_col_glob(:) - type(psb_s_vect_type) :: b_col, x_col, r_col - type(psb_s_vect_type) :: xg, bg, xv, bv -#ifdef HAVE_GPU - type(psb_s_vect_gpu) :: vmold -#endif - real(psb_spk_), allocatable :: xc1(:),xc2(:) - ! communications data structure - type(psb_desc_type):: desc_a - - type(psb_ctxt_type) :: ctxt - integer :: iam, np - integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes - real(psb_spk_) :: err, eps, samatsize, sgmatsize - - character(len=5) :: acfmt, agfmt - character(len=20) :: name - character(len=2) :: filefmt - integer, parameter :: iunit=12 - integer, parameter :: times=2000 - integer, parameter :: ntests=200, ngpu=50, ncnv=20 - - type(psb_s_coo_sparse_mat), target :: acoo - type(psb_s_csr_sparse_mat), target :: acsr - type(psb_s_ell_sparse_mat), target :: aell - type(psb_s_hll_sparse_mat), target :: ahll - type(psb_s_hdia_sparse_mat), target :: ahdia -#ifdef HAVE_GPU - type(psb_s_elg_sparse_mat), target :: aelg - type(psb_s_csrg_sparse_mat), target :: acsrg - type(psb_s_hybg_sparse_mat), target :: ahybg - type(psb_s_hlg_sparse_mat), target :: ahlg - type(psb_s_hdiag_sparse_mat), target :: ahdiag -#endif - class(psb_s_base_sparse_mat), pointer :: acmold, agmold - ! other variables - integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg - integer :: internal, m,ii,nnzero - real(psb_dpk_) :: t0,t1, t2, tprec, flops - real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& - & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 - integer :: nrhs, nrow, n_row, dim, nv, ne - integer, allocatable :: ivg(:), ipv(:) - - - call psb_init(ctxt) - call psb_info(ctxt,iam,np) -#ifdef HAVE_GPU - call psb_gpu_init(ctxt) -#endif - if (iam < 0) then - ! This should not happen, but just in case - call psb_exit(ctxt) - stop - endif - - - name='file_spmv' - if(psb_get_errstatus() /= 0) goto 9999 - info=psb_success_ - call psb_set_errverbosity(2) - if (iam == psb_root_) then - write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ - write(*,*) 'This is the ',trim(name),' sample program' - end if -#ifdef HAVE_GPU - write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) -#endif - - if (iam == 0) then - write(*,*) 'Matrix? ' - call read_data(mtrx_file,psb_inp_unit) - write(*,*) 'file format' - call read_data(filefmt,psb_inp_unit) - write(*,*) 'CPU format' - call read_data(acfmt,psb_inp_unit) - write(*,*) 'GPU format' - call read_data(agfmt,psb_inp_unit) - write(*,*) 'distribution ' - call read_data(ipart,psb_inp_unit) - write(*,*) 'Read all data, going on' - end if - call psb_bcast(ctxt,mtrx_file) - call psb_bcast(ctxt,filefmt) - call psb_bcast(ctxt,acfmt) - call psb_bcast(ctxt,agfmt) - call psb_bcast(ctxt,ipart) - call psb_barrier(ctxt) - t0 = psb_wtime() - ! read the input matrix to be processed and (possibly) the rhs - nrhs = 1 - - if (iam==psb_root_) then - select case(psb_toupper(filefmt)) - case('MM') - ! For Matrix Market we have an input file for the matrix - ! and an (optional) second file for the RHS. - call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case ('HB') - ! For Harwell-Boeing we have a single file which may or may not - ! contain an RHS. - call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case default - info = -1 - write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt - end select - if (info /= 0) then - write(psb_err_unit,*) 'Error while reading input matrix ' - call psb_abort(ctxt) - end if - - ! - ! Always get nnz from original matrix. - ! Some formats add fill-in and do not keep track - ! of how many were added. So if the original matrix - ! contained some extra zeros, the count of entries - ! is not recoverable exactly. - ! - nrt = aux_a%get_nrows() - annz = aux_a%get_nzeros() - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - write(psb_out_unit,'("Generating an rhs...")') - write(psb_out_unit,'(" ")') - call psb_realloc(nrt,1,aux_b,info) - if (info /= 0) then - call psb_errpush(4000,name) - goto 9999 - endif - - b_col_glob => aux_b(:,1) - do i=1, nrt - b_col_glob(i) = 1.d0 - enddo - - else - - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - end if - - - select case(psb_toupper(acfmt)) - case('COO') - acmold => acoo - case('CSR') - acmold => acsr - case('ELL') - acmold => aell - case('HLL') - acmold => ahll - case('HDIA') - acmold => ahdia - case default - write(*,*) 'Unknown format defaulting to CSR' - acmold => acsr - end select - -#ifdef HAVE_GPU - select case(psb_toupper(agfmt)) - case('ELG') - agmold => aelg - case('HLG') - agmold => ahlg - case('CSRG') - agmold => acsrg - case('HYBG') - agmold => ahybg - case('HDIAG') - agmold => ahdiag - case default - write(*,*) 'Unknown format defaulting to HLG' - agmold => ahlg - end select -#endif - - - ! switch over different partition types - if (ipart == 0) then - call psb_barrier(ctxt) - if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') - allocate(ivg(nrt),ipv(np)) - do i=1,nrt - call part_block(i,nrt,np,ipv,nv) - ivg(i) = ipv(1) - enddo - call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) - else if (ipart == 2) then - if (iam==psb_root_) then - write(psb_out_unit,'("Partition type: graph")') - write(psb_out_unit,'(" ")') - ! write(psb_err_unit,'("Build type: graph")') - call build_mtpart(aux_a,np) - endif - call psb_barrier(ctxt) - call distr_mtpart(psb_root_,ctxt) - call getv_mtpart(ivg) - call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) - else - if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') - call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) - end if - - call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) - - t2 = psb_wtime() - t0 - - call psb_amx(ctxt, t2) - - if (iam==psb_root_) then - write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 - write(psb_out_unit,'(" ")') - end if - call a%cscnv(aux_a,info,mold=acoo) - tcnvcsr = 0 - tcnvgpu = 0 - nr = desc_a%get_local_rows() - nrg = desc_a%get_global_rows() - call psb_geall(x_col,desc_a,info) - do i=1, nr - call desc_a%l2g(i,ig,info) - call psb_geins(ione,(/ig/),(/(sone + (sone*ig)/nrg)/),x_col,desc_a,info) - end do - call psb_geasb(x_col,desc_a,info) - do j=1, ncnv - call aux_a%cscnv(a,info,mold=acoo) - call psb_barrier(ctxt) - t1 = psb_wtime() - call a%cscnv(info,mold=acmold) - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - tcnvcsr = tcnvcsr + t2 - if (j==1) tcnvc1 = t2 - xc1 = x_col%get_vect() - call xv%bld(xc1) - call psb_geasb(bv,desc_a,info,scratch=.true.) - -#ifdef HAVE_GPU - - call aux_a%cscnv(agpu,info,mold=acoo) - call xg%bld(xc1,mold=vmold) - call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) - call psb_barrier(ctxt) - t1 = psb_wtime() - call agpu%cscnv(info,mold=agmold) - call psb_gpu_DeviceSync() - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - if (j==1) tcnvg1 = t2 - tcnvgpu = tcnvgpu + t2 -#endif - end do - - call psb_barrier(ctxt) - t1 = psb_wtime() - do i=1,ntests - call psb_spmm(sone,a,xv,szero,bv,desc_a,info) - end do - call psb_barrier(ctxt) - t2 = psb_wtime() - t1 - call psb_amx(ctxt,t2) - -#ifdef HAVE_GPU - ! FIXME: cache flush needed here - call psb_barrier(ctxt) - tt1 = psb_wtime() - do i=1,ntests - call psb_spmm(sone,agpu,xv,szero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 1 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - tt2 = psb_wtime() - tt1 - call psb_amx(ctxt,tt2) - xc1 = bv%get_vect() - xc2 = bg%get_vect() - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on xGPU',eps - - call xg%sync() - ! FIXME: cache flush needed here - - call psb_barrier(ctxt) - gt1 = psb_wtime() - do i=1,ntests*ngpu - call psb_spmm(sone,agpu,xg,szero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 2 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - ! For timing purposes we need to make sure all threads - ! in the device are done. - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - gt2 = psb_wtime() - gt1 - call psb_amx(ctxt,gt2) - call bg%sync() - xc1 = bv%get_vect() - xc2 = bg%get_vect() - call psb_geaxpby(-sone,bg,+sone,bv,desc_a,info) - eps = psb_geamax(bv,desc_a,info) - - call psb_amx(ctxt,t2) - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on GPU',eps -#endif - - - amatsize = a%sizeof() - agmatsize = agpu%sizeof() - samatsize = amatsize - samatsize = samatsize/(1024*1024) - sgmatsize = agmatsize - sgmatsize = sgmatsize/(1024*1024) - descsize = psb_sizeof(desc_a) - call psb_sum(ctxt,samatsize) - call psb_sum(ctxt,sgmatsize) - call psb_sum(ctxt,descsize) - - if (iam == psb_root_) then - write(psb_out_unit,'("Matrix: ",a)') mtrx_file - write(psb_out_unit,& - &'("Test on : ",i20," processors")') np - write(psb_out_unit,& - &'("Size of matrix : ",i20," ")') nrt - write(psb_out_unit,& - &'("Number of nonzeros : ",i20," ")') annz - write(psb_out_unit,& - &'("Memory occupation CPU (MBytes) : ",f20.2," ")') samatsize - write(psb_out_unit,& - &'("Memory occupation GPU (MBytes) : ",f20.2," ")') sgmatsize - write(psb_out_unit,& - &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize - write(psb_out_unit,& - &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize - flops = ntests*(2.d0*annz) - tflops = flops - gflops = flops * ngpu - write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() -#ifdef HAVE_GPU - write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() - write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& - & tcnvc1 - write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& - & tcnvcsr - write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& - & tcnvcsr/ncnv - write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& - & tcnvg1 - write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& - & tcnvgpu - write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& - & tcnvgpu/ncnv - -#endif - write(psb_out_unit,& - & '("Number of flops (",i0," prod) : ",F20.0," ")') & - & ntests,flops - - flops = flops / (t2) - tflops = tflops / (tt2) - gflops = gflops / (gt2) - write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& - & ntests,t2 - write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& - & t2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& - & flops/1.d6 -#ifdef HAVE_GPU - - write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& - & ntests, tt2 - write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& - & tt2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& - & tflops/1.d6 - - write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& - & ngpu*ntests,gt2 - write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& - & gt2*1.d3/(1.d0*ntests*ngpu) - write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& - & gflops/1.d6 -#endif - ! - ! This computation assumes the data movement associated with CSR: - ! it is minimal in terms of coefficients. Other formats may either move - ! more data (padding etc.) or less data (if they can save on the indices). - ! - nbytes = nr*(2*psb_sizeof_sp + psb_sizeof_ip)+& - & annz*(psb_sizeof_sp + psb_sizeof_ip) - bdwdth = ntests*nbytes/(t2*1.d6) - write(psb_out_unit,*) - write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth -#ifdef HAVE_GPU - bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) - write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth -#endif - write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() - write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize - - end if - - call psb_gefree(b_col, desc_a,info) - call psb_gefree(x_col, desc_a,info) - call psb_gefree(xv, desc_a,info) - call psb_gefree(bv, desc_a,info) - call psb_spfree(a, desc_a,info) -#ifdef HAVE_GPU - call psb_gefree(xg, desc_a,info) - call psb_gefree(bg, desc_a,info) - call psb_spfree(agpu,desc_a,info) - call psb_gpu_exit() -#endif - call psb_cdfree(desc_a,info) - - call psb_exit(ctxt) - stop - -9999 continue - call psb_error(ctxt) - -end program s_file_spmv - - - - - diff --git a/test/cudakern/z_file_spmv.F90 b/test/cudakern/z_file_spmv.F90 deleted file mode 100644 index 153dd5e1..00000000 --- a/test/cudakern/z_file_spmv.F90 +++ /dev/null @@ -1,491 +0,0 @@ -! -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -program z_file_spmv - use psb_base_mod - use psb_util_mod - use psb_ext_mod -#ifdef HAVE_GPU - use psb_gpu_mod -#endif - use data_input - implicit none - - ! input parameters - character(len=200) :: mtrx_file - - ! sparse matrices - type(psb_zspmat_type) :: a, aux_a, agpu - - ! dense matrices - complex(psb_dpk_), allocatable, target :: aux_b(:,:), d(:) - complex(psb_dpk_), allocatable , save :: x_col_glob(:), r_col_glob(:) - complex(psb_dpk_), pointer :: b_col_glob(:) - type(psb_z_vect_type) :: b_col, x_col, r_col - type(psb_z_vect_type) :: xg, bg, xv, bv -#ifdef HAVE_GPU - type(psb_z_vect_gpu) :: vmold -#endif - complex(psb_dpk_), allocatable :: xc1(:),xc2(:) - ! communications data structure - type(psb_desc_type):: desc_a - - type(psb_ctxt_type) :: ctxt - integer :: iam, np - integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes - real(psb_dpk_) :: damatsize, dgmatsize - complex(psb_dpk_) :: err, eps - - character(len=5) :: acfmt, agfmt - character(len=20) :: name - character(len=2) :: filefmt - integer, parameter :: iunit=12 - integer, parameter :: times=2000 - integer, parameter :: ntests=200, ngpu=50, ncnv=20 - - type(psb_z_coo_sparse_mat), target :: acoo - type(psb_z_csr_sparse_mat), target :: acsr - type(psb_z_ell_sparse_mat), target :: aell - type(psb_z_hll_sparse_mat), target :: ahll -#ifdef HAVE_GPU - type(psb_z_elg_sparse_mat), target :: aelg - type(psb_z_csrg_sparse_mat), target :: acsrg - type(psb_z_hybg_sparse_mat), target :: ahybg - type(psb_z_hlg_sparse_mat), target :: ahlg -#endif - class(psb_z_base_sparse_mat), pointer :: acmold, agmold - ! other variables - integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg - integer :: internal, m,ii,nnzero - real(psb_dpk_) :: t0,t1, t2, tprec, flops - real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& - & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 - integer :: nrhs, nrow, n_row, dim, nv, ne - integer, allocatable :: ivg(:), ipv(:) - - - call psb_init(ctxt) - call psb_info(ctxt,iam,np) -#ifdef HAVE_GPU - call psb_gpu_init(ctxt) -#endif - if (iam < 0) then - ! This should not happen, but just in case - call psb_exit(ctxt) - stop - endif - - - name='file_spmv' - if(psb_get_errstatus() /= 0) goto 9999 - info=psb_success_ - call psb_set_errverbosity(2) - if (iam == psb_root_) then - write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ - write(*,*) 'This is the ',trim(name),' sample program' - end if -#ifdef HAVE_GPU - write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) -#endif - - if (iam == 0) then - write(*,*) 'Matrix? ' - call read_data(mtrx_file,psb_inp_unit) - write(*,*) 'file format' - call read_data(filefmt,psb_inp_unit) - write(*,*) 'CPU format' - call read_data(acfmt,psb_inp_unit) - write(*,*) 'GPU format' - call read_data(agfmt,psb_inp_unit) - write(*,*) 'distribution ' - call read_data(ipart,psb_inp_unit) - write(*,*) 'Read all data, going on' - end if - call psb_bcast(ctxt,mtrx_file) - call psb_bcast(ctxt,filefmt) - call psb_bcast(ctxt,acfmt) - call psb_bcast(ctxt,agfmt) - call psb_bcast(ctxt,ipart) - call psb_barrier(ctxt) - t0 = psb_wtime() - ! read the input matrix to be processed and (possibly) the rhs - nrhs = 1 - - if (iam==psb_root_) then - select case(psb_toupper(filefmt)) - case('MM') - ! For Matrix Market we have an input file for the matrix - ! and an (optional) second file for the RHS. - call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case ('HB') - ! For Harwell-Boeing we have a single file which may or may not - ! contain an RHS. - call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case default - info = -1 - write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt - end select - if (info /= 0) then - write(psb_err_unit,*) 'Error while reading input matrix ' - call psb_abort(ctxt) - end if - - ! - ! Always get nnz from original matrix. - ! Some formats add fill-in and do not keep track - ! of how many were added. So if the original matrix - ! contained some extra zeros, the count of entries - ! is not recoverable exactly. - ! - nrt = aux_a%get_nrows() - annz = aux_a%get_nzeros() - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - write(psb_out_unit,'("Generating an rhs...")') - write(psb_out_unit,'(" ")') - call psb_realloc(nrt,1,aux_b,info) - if (info /= 0) then - call psb_errpush(4000,name) - goto 9999 - endif - - b_col_glob => aux_b(:,1) - do i=1, nrt - b_col_glob(i) = 1.d0 - enddo - - else - - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - end if - - - select case(psb_toupper(acfmt)) - case('COO') - acmold => acoo - case('CSR') - acmold => acsr - case('ELL') - acmold => aell - case('HLL') - acmold => ahll - case default - write(*,*) 'Unknown format defaulting to CSR' - acmold => acsr - end select - -#ifdef HAVE_GPU - select case(psb_toupper(agfmt)) - case('ELG') - agmold => aelg - case('HLG') - agmold => ahlg - case('CSRG') - agmold => acsrg - case('HYBG') - agmold => ahybg - case default - write(*,*) 'Unknown format defaulting to HLG' - agmold => ahlg - end select -#endif - - - ! switch over different partition types - if (ipart == 0) then - call psb_barrier(ctxt) - if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') - allocate(ivg(nrt),ipv(np)) - do i=1,nrt - call part_block(i,nrt,np,ipv,nv) - ivg(i) = ipv(1) - enddo - call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) - else if (ipart == 2) then - if (iam==psb_root_) then - write(psb_out_unit,'("Partition type: graph")') - write(psb_out_unit,'(" ")') - ! write(psb_err_unit,'("Build type: graph")') - call build_mtpart(aux_a,np) - endif - call psb_barrier(ctxt) - call distr_mtpart(psb_root_,ctxt) - call getv_mtpart(ivg) - call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) - else - if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') - call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) - end if - - call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) - - t2 = psb_wtime() - t0 - - call psb_amx(ctxt, t2) - - if (iam==psb_root_) then - write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 - write(psb_out_unit,'(" ")') - end if - call a%cscnv(aux_a,info,mold=acoo) - tcnvcsr = 0 - tcnvgpu = 0 - nr = desc_a%get_local_rows() - nrg = desc_a%get_global_rows() - call psb_geall(x_col,desc_a,info) - do i=1, nr - call desc_a%l2g(i,ig,info) - call psb_geins(ione,(/ig/),(/(zone + (zone*ig)/nrg)/),x_col,desc_a,info) - end do - call psb_geasb(x_col,desc_a,info) - do j=1, ncnv - call aux_a%cscnv(a,info,mold=acoo) - call psb_barrier(ctxt) - t1 = psb_wtime() - call a%cscnv(info,mold=acmold) - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - tcnvcsr = tcnvcsr + t2 - if (j==1) tcnvc1 = t2 - xc1 = x_col%get_vect() - call xv%bld(xc1) - call psb_geasb(bv,desc_a,info,scratch=.true.) - -#ifdef HAVE_GPU - - call aux_a%cscnv(agpu,info,mold=acoo) - call xg%bld(xc1,mold=vmold) - call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) - call psb_barrier(ctxt) - t1 = psb_wtime() - call agpu%cscnv(info,mold=agmold) - call psb_gpu_DeviceSync() - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - if (j==1) tcnvg1 = t2 - tcnvgpu = tcnvgpu + t2 -#endif - end do - - call psb_barrier(ctxt) - t1 = psb_wtime() - do i=1,ntests - call psb_spmm(zone,a,xv,zzero,bv,desc_a,info) - end do - call psb_barrier(ctxt) - t2 = psb_wtime() - t1 - call psb_amx(ctxt,t2) - -#ifdef HAVE_GPU - ! FIXME: cache flush needed here - call psb_barrier(ctxt) - tt1 = psb_wtime() - do i=1,ntests - call psb_spmm(zone,agpu,xv,zzero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 1 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - tt2 = psb_wtime() - tt1 - call psb_amx(ctxt,tt2) - xc1 = bv%get_vect() - xc2 = bg%get_vect() - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on xGPU',eps - - call xg%sync() - ! FIXME: cache flush needed here - - call psb_barrier(ctxt) - gt1 = psb_wtime() - do i=1,ntests*ngpu - call psb_spmm(zone,agpu,xg,zzero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 2 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - ! For timing purposes we need to make sure all threads - ! in the device are done. - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - gt2 = psb_wtime() - gt1 - call psb_amx(ctxt,gt2) - call bg%sync() - xc1 = bv%get_vect() - xc2 = bg%get_vect() - call psb_geaxpby(-zone,bg,+zone,bv,desc_a,info) - eps = psb_geamax(bv,desc_a,info) - - call psb_amx(ctxt,t2) - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on GPU',eps -#endif - - - amatsize = a%sizeof() - agmatsize = agpu%sizeof() - damatsize = amatsize - damatsize = damatsize/(1024*1024) - dgmatsize = agmatsize - dgmatsize = dgmatsize/(1024*1024) - descsize = psb_sizeof(desc_a) - call psb_sum(ctxt,damatsize) - call psb_sum(ctxt,dgmatsize) - call psb_sum(ctxt,descsize) - - if (iam == psb_root_) then - write(psb_out_unit,'("Matrix: ",a)') mtrx_file - write(psb_out_unit,& - &'("Test on : ",i20," processors")') np - write(psb_out_unit,& - &'("Size of matrix : ",i20," ")') nrt - write(psb_out_unit,& - &'("Number of nonzeros : ",i20," ")') annz - write(psb_out_unit,& - &'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize - write(psb_out_unit,& - &'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize - write(psb_out_unit,& - &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize - write(psb_out_unit,& - &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize - flops = ntests*(2.d0*annz) - tflops = flops - gflops = flops * ngpu - write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() -#ifdef HAVE_GPU - write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() - write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& - & tcnvc1 - write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& - & tcnvcsr - write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& - & tcnvcsr/ncnv - write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& - & tcnvg1 - write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& - & tcnvgpu - write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& - & tcnvgpu/ncnv - -#endif - write(psb_out_unit,& - & '("Number of flops (",i0," prod) : ",F20.0," ")') & - & ntests,flops - - flops = flops / (t2) - tflops = tflops / (tt2) - gflops = gflops / (gt2) - write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& - & ntests,t2 - write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& - & t2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& - & flops/1.d6 -#ifdef HAVE_GPU - - write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& - & ntests, tt2 - write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& - & tt2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& - & tflops/1.d6 - - write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& - & ngpu*ntests,gt2 - write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& - & gt2*1.d3/(1.d0*ntests*ngpu) - write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& - & gflops/1.d6 -#endif - ! - ! This computation assumes the data movement associated with CSR: - ! it is minimal in terms of coefficients. Other formats may either move - ! more data (padding etc.) or less data (if they can save on the indices). - ! - nbytes = nr*(2*2*psb_sizeof_dp + psb_sizeof_ip)+& - & annz*(2*psb_sizeof_dp + psb_sizeof_ip) - bdwdth = ntests*nbytes/(t2*1.d6) - write(psb_out_unit,*) - write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth -#ifdef HAVE_GPU - bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) - write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth -#endif - write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() - write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize - - end if - - call psb_gefree(b_col, desc_a,info) - call psb_gefree(x_col, desc_a,info) - call psb_gefree(xv, desc_a,info) - call psb_gefree(bv, desc_a,info) - call psb_spfree(a, desc_a,info) -#ifdef HAVE_GPU - call psb_gefree(xg, desc_a,info) - call psb_gefree(bg, desc_a,info) - call psb_spfree(agpu,desc_a,info) - call psb_gpu_exit() -#endif - call psb_cdfree(desc_a,info) - - call psb_exit(ctxt) - stop - -9999 continue - call psb_error(ctxt) - -end program z_file_spmv - - - - -