From 00cc83cde8d03e85539ee06fbb3873ab80357a4f Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 18 Jan 2023 05:04:16 -0500 Subject: [PATCH 001/110] First version of AD/AND with memory duplication --- base/modules/serial/psb_c_mat_mod.F90 | 1 + base/modules/serial/psb_d_mat_mod.F90 | 1 + base/modules/serial/psb_s_mat_mod.F90 | 1 + base/modules/serial/psb_z_mat_mod.F90 | 1 + base/psblas/psb_cspmm.f90 | 21 ++++++++++++++----- base/psblas/psb_dspmm.f90 | 21 ++++++++++++++----- base/psblas/psb_sspmm.f90 | 21 ++++++++++++++----- base/psblas/psb_zspmm.f90 | 21 ++++++++++++++----- base/tools/psb_cspasb.f90 | 30 ++++++++++++++++++++++++++- base/tools/psb_dspasb.f90 | 30 ++++++++++++++++++++++++++- base/tools/psb_sspasb.f90 | 30 ++++++++++++++++++++++++++- base/tools/psb_zspasb.f90 | 30 ++++++++++++++++++++++++++- compile | 0 test/pargen/runs/ppde.inp | 2 +- 14 files changed, 185 insertions(+), 25 deletions(-) create mode 100644 compile diff --git a/base/modules/serial/psb_c_mat_mod.F90 b/base/modules/serial/psb_c_mat_mod.F90 index fd423de3..2e365858 100644 --- a/base/modules/serial/psb_c_mat_mod.F90 +++ b/base/modules/serial/psb_c_mat_mod.F90 @@ -85,6 +85,7 @@ module psb_c_mat_mod type :: psb_cspmat_type class(psb_c_base_sparse_mat), allocatable :: a + class(psb_c_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_lc_coo_sparse_mat), allocatable :: rmta diff --git a/base/modules/serial/psb_d_mat_mod.F90 b/base/modules/serial/psb_d_mat_mod.F90 index 8f967ce1..49a9545e 100644 --- a/base/modules/serial/psb_d_mat_mod.F90 +++ b/base/modules/serial/psb_d_mat_mod.F90 @@ -85,6 +85,7 @@ module psb_d_mat_mod type :: psb_dspmat_type class(psb_d_base_sparse_mat), allocatable :: a + class(psb_d_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_ld_coo_sparse_mat), allocatable :: rmta diff --git a/base/modules/serial/psb_s_mat_mod.F90 b/base/modules/serial/psb_s_mat_mod.F90 index 43f1c619..eb444249 100644 --- a/base/modules/serial/psb_s_mat_mod.F90 +++ b/base/modules/serial/psb_s_mat_mod.F90 @@ -85,6 +85,7 @@ module psb_s_mat_mod type :: psb_sspmat_type class(psb_s_base_sparse_mat), allocatable :: a + class(psb_s_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_ls_coo_sparse_mat), allocatable :: rmta diff --git a/base/modules/serial/psb_z_mat_mod.F90 b/base/modules/serial/psb_z_mat_mod.F90 index c534cad5..e70e48aa 100644 --- a/base/modules/serial/psb_z_mat_mod.F90 +++ b/base/modules/serial/psb_z_mat_mod.F90 @@ -85,6 +85,7 @@ module psb_z_mat_mod type :: psb_zspmat_type class(psb_z_base_sparse_mat), allocatable :: a + class(psb_z_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_lz_coo_sparse_mat), allocatable :: rmta diff --git a/base/psblas/psb_cspmm.f90 b/base/psblas/psb_cspmm.f90 index fd8a9c39..555461df 100644 --- a/base/psblas/psb_cspmm.f90 +++ b/base/psblas/psb_cspmm.f90 @@ -179,13 +179,24 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + if (.true.) then + call psi_swapdata(psb_swap_send_,& & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + call psi_swapdata(psb_swap_recv_,& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%and%spmm(alpha,x%v,cone,y%v,info) + + else + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index a006c7e9..be8a493f 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -179,13 +179,24 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + if (.true.) then + call psi_swapdata(psb_swap_send_,& & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + call psi_swapdata(psb_swap_recv_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%and%spmm(alpha,x%v,done,y%v,info) + + else + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/psblas/psb_sspmm.f90 b/base/psblas/psb_sspmm.f90 index 43ee0d48..79bfbdd1 100644 --- a/base/psblas/psb_sspmm.f90 +++ b/base/psblas/psb_sspmm.f90 @@ -179,13 +179,24 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + if (.true.) then + call psi_swapdata(psb_swap_send_,& & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + call psi_swapdata(psb_swap_recv_,& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%and%spmm(alpha,x%v,sone,y%v,info) + + else + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/psblas/psb_zspmm.f90 b/base/psblas/psb_zspmm.f90 index b58ca303..f248db8b 100644 --- a/base/psblas/psb_zspmm.f90 +++ b/base/psblas/psb_zspmm.f90 @@ -179,13 +179,24 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + if (.true.) then + call psi_swapdata(psb_swap_send_,& & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + call psi_swapdata(psb_swap_recv_,& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%and%spmm(alpha,x%v,zone,y%v,info) + + else + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/tools/psb_cspasb.f90 b/base/tools/psb_cspasb.f90 index 0c5f14ab..ea7789f2 100644 --- a/base/tools/psb_cspasb.f90 +++ b/base/tools/psb_cspasb.f90 @@ -171,7 +171,35 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) end if - + if (.true.) then + block + character(len=1024) :: fname + type(psb_c_coo_sparse_mat) :: acoo + type(psb_c_csr_sparse_mat), allocatable :: aclip, andclip + allocate(aclip,andclip) + call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) + call aclip%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) + call andclip%mv_from_coo(acoo,info) + call move_alloc(aclip,a%ad) + call move_alloc(andclip,a%and) + if (.false.) then + write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' + open(25,file=fname) + call a%ad%print(25) + close(25) + write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' + open(25,file=fname) + call a%and%print(25) + close(25) + !call andclip%set_cols(n_col) + write(*,*) me,' ',trim(name),' ad ',& + &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col + write(*,*) me,' ',trim(name),' and ',& + &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col + end if + end block + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_dspasb.f90 b/base/tools/psb_dspasb.f90 index 3132f249..89ceef8d 100644 --- a/base/tools/psb_dspasb.f90 +++ b/base/tools/psb_dspasb.f90 @@ -171,7 +171,35 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) end if - + if (.true.) then + block + character(len=1024) :: fname + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat), allocatable :: aclip, andclip + allocate(aclip,andclip) + call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) + call aclip%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) + call andclip%mv_from_coo(acoo,info) + call move_alloc(aclip,a%ad) + call move_alloc(andclip,a%and) + if (.false.) then + write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' + open(25,file=fname) + call a%ad%print(25) + close(25) + write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' + open(25,file=fname) + call a%and%print(25) + close(25) + !call andclip%set_cols(n_col) + write(*,*) me,' ',trim(name),' ad ',& + &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col + write(*,*) me,' ',trim(name),' and ',& + &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col + end if + end block + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_sspasb.f90 b/base/tools/psb_sspasb.f90 index cfa316eb..14ad5246 100644 --- a/base/tools/psb_sspasb.f90 +++ b/base/tools/psb_sspasb.f90 @@ -171,7 +171,35 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) end if - + if (.true.) then + block + character(len=1024) :: fname + type(psb_s_coo_sparse_mat) :: acoo + type(psb_s_csr_sparse_mat), allocatable :: aclip, andclip + allocate(aclip,andclip) + call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) + call aclip%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) + call andclip%mv_from_coo(acoo,info) + call move_alloc(aclip,a%ad) + call move_alloc(andclip,a%and) + if (.false.) then + write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' + open(25,file=fname) + call a%ad%print(25) + close(25) + write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' + open(25,file=fname) + call a%and%print(25) + close(25) + !call andclip%set_cols(n_col) + write(*,*) me,' ',trim(name),' ad ',& + &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col + write(*,*) me,' ',trim(name),' and ',& + &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col + end if + end block + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_zspasb.f90 b/base/tools/psb_zspasb.f90 index aeeef94d..f65be363 100644 --- a/base/tools/psb_zspasb.f90 +++ b/base/tools/psb_zspasb.f90 @@ -171,7 +171,35 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) end if - + if (.true.) then + block + character(len=1024) :: fname + type(psb_z_coo_sparse_mat) :: acoo + type(psb_z_csr_sparse_mat), allocatable :: aclip, andclip + allocate(aclip,andclip) + call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) + call aclip%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) + call andclip%mv_from_coo(acoo,info) + call move_alloc(aclip,a%ad) + call move_alloc(andclip,a%and) + if (.false.) then + write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' + open(25,file=fname) + call a%ad%print(25) + close(25) + write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' + open(25,file=fname) + call a%and%print(25) + close(25) + !call andclip%set_cols(n_col) + write(*,*) me,' ',trim(name),' ad ',& + &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col + write(*,*) me,' ',trim(name),' and ',& + &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col + end if + end block + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/compile b/compile new file mode 100644 index 00000000..e69de29b diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index e7e5dca2..5f040075 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -2,7 +2,7 @@ BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR BJAC Preconditioner NONE DIAG BJAC CSR Storage format for matrix A: CSR COO -040 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) +140 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) 3 Partition: 1 BLOCK 3 3D 2 Stopping criterion 1 2 0100 MAXIT From f09e25524ed30ab5f783ac2d6efbd1684717d317 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 18 Jan 2023 09:17:49 -0500 Subject: [PATCH 002/110] Create ECSR format and use it for A%AND --- base/modules/serial/psb_c_csr_mat_mod.f90 | 126 ++++++++++- base/modules/serial/psb_c_mat_mod.F90 | 3 +- base/modules/serial/psb_d_csr_mat_mod.f90 | 126 ++++++++++- base/modules/serial/psb_d_mat_mod.F90 | 3 +- base/modules/serial/psb_s_csr_mat_mod.f90 | 126 ++++++++++- base/modules/serial/psb_s_mat_mod.F90 | 3 +- base/modules/serial/psb_z_csr_mat_mod.f90 | 126 ++++++++++- base/modules/serial/psb_z_mat_mod.F90 | 3 +- base/serial/impl/psb_c_csr_impl.f90 | 263 ++++++++++++++++++++++ base/serial/impl/psb_d_csr_impl.f90 | 263 ++++++++++++++++++++++ base/serial/impl/psb_s_csr_impl.f90 | 263 ++++++++++++++++++++++ base/serial/impl/psb_z_csr_impl.f90 | 263 ++++++++++++++++++++++ base/tools/psb_cspasb.f90 | 7 +- base/tools/psb_dspasb.f90 | 7 +- base/tools/psb_sspasb.f90 | 7 +- base/tools/psb_zspasb.f90 | 7 +- test/pargen/runs/ppde.inp | 2 +- 17 files changed, 1577 insertions(+), 21 deletions(-) diff --git a/base/modules/serial/psb_c_csr_mat_mod.f90 b/base/modules/serial/psb_c_csr_mat_mod.f90 index 8b076cc2..d09eca2b 100644 --- a/base/modules/serial/psb_c_csr_mat_mod.f90 +++ b/base/modules/serial/psb_c_csr_mat_mod.f90 @@ -579,7 +579,111 @@ module psb_c_csr_mat_mod end subroutine psb_c_csr_scals end interface - !> \namespace psb_base_mod \class psb_lc_csr_sparse_mat + + type, extends(psb_c_csr_sparse_mat) :: psb_c_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => c_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_c_ecsr_csmm + procedure, pass(a) :: csmv => psb_c_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_c_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_c_ecsr_cmp_nerwp + procedure, pass(a) :: free => c_ecsr_free + procedure, pass(a) :: mold => psb_c_ecsr_mold + + end type psb_c_ecsr_sparse_mat + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_csmv + interface + subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_c_ecsr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ecsr_csmv + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_cp_from_coo + interface + subroutine psb_c_ecsr_cmp_nerwp(a,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ecsr_cmp_nerwp + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_cp_from_coo + interface + subroutine psb_c_cp_ecsr_from_coo(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ecsr_from_coo + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_cp_from_fmt + interface + subroutine psb_c_cp_ecsr_from_fmt(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ecsr_from_fmt + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_mv_from_coo + interface + subroutine psb_c_mv_ecsr_from_coo(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ecsr_from_coo + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_mv_from_fmt + interface + subroutine psb_c_mv_ecsr_from_fmt(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ecsr_from_fmt + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_c_ecsr_mold(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_lc_csr_sparse_mat !! \extends psb_lc_base_mat_mod::psb_lc_base_sparse_mat !! !! psb_lc_csr_sparse_mat type and the related methods. @@ -1178,6 +1282,26 @@ contains + function c_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function c_ecsr_get_fmt + + subroutine c_ecsr_free(a) + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_c_csr_sparse_mat%free() + + return + end subroutine c_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_c_mat_mod.F90 b/base/modules/serial/psb_c_mat_mod.F90 index 2e365858..aa891381 100644 --- a/base/modules/serial/psb_c_mat_mod.F90 +++ b/base/modules/serial/psb_c_mat_mod.F90 @@ -79,7 +79,8 @@ module psb_c_mat_mod use psb_c_base_mat_mod - use psb_c_csr_mat_mod, only : psb_c_csr_sparse_mat, psb_lc_csr_sparse_mat + use psb_c_csr_mat_mod, only : psb_c_csr_sparse_mat, psb_lc_csr_sparse_mat,& + & psb_c_ecsr_sparse_mat use psb_c_csc_mat_mod, only : psb_c_csc_sparse_mat, psb_lc_csc_sparse_mat type :: psb_cspmat_type diff --git a/base/modules/serial/psb_d_csr_mat_mod.f90 b/base/modules/serial/psb_d_csr_mat_mod.f90 index d0aa622b..12d71755 100644 --- a/base/modules/serial/psb_d_csr_mat_mod.f90 +++ b/base/modules/serial/psb_d_csr_mat_mod.f90 @@ -579,7 +579,111 @@ module psb_d_csr_mat_mod end subroutine psb_d_csr_scals end interface - !> \namespace psb_base_mod \class psb_ld_csr_sparse_mat + + type, extends(psb_d_csr_sparse_mat) :: psb_d_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => d_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_d_ecsr_csmm + procedure, pass(a) :: csmv => psb_d_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_d_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_d_ecsr_cmp_nerwp + procedure, pass(a) :: free => d_ecsr_free + procedure, pass(a) :: mold => psb_d_ecsr_mold + + end type psb_d_ecsr_sparse_mat + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_csmv + interface + subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_d_ecsr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ecsr_csmv + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_cp_from_coo + interface + subroutine psb_d_ecsr_cmp_nerwp(a,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ecsr_cmp_nerwp + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_cp_from_coo + interface + subroutine psb_d_cp_ecsr_from_coo(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ecsr_from_coo + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_cp_from_fmt + interface + subroutine psb_d_cp_ecsr_from_fmt(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ecsr_from_fmt + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_mv_from_coo + interface + subroutine psb_d_mv_ecsr_from_coo(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ecsr_from_coo + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_mv_from_fmt + interface + subroutine psb_d_mv_ecsr_from_fmt(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ecsr_from_fmt + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_d_ecsr_mold(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_ld_csr_sparse_mat !! \extends psb_ld_base_mat_mod::psb_ld_base_sparse_mat !! !! psb_ld_csr_sparse_mat type and the related methods. @@ -1178,6 +1282,26 @@ contains + function d_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function d_ecsr_get_fmt + + subroutine d_ecsr_free(a) + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_d_csr_sparse_mat%free() + + return + end subroutine d_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_d_mat_mod.F90 b/base/modules/serial/psb_d_mat_mod.F90 index 49a9545e..c647e76b 100644 --- a/base/modules/serial/psb_d_mat_mod.F90 +++ b/base/modules/serial/psb_d_mat_mod.F90 @@ -79,7 +79,8 @@ module psb_d_mat_mod use psb_d_base_mat_mod - use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat, psb_ld_csr_sparse_mat + use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat, psb_ld_csr_sparse_mat,& + & psb_d_ecsr_sparse_mat use psb_d_csc_mat_mod, only : psb_d_csc_sparse_mat, psb_ld_csc_sparse_mat type :: psb_dspmat_type diff --git a/base/modules/serial/psb_s_csr_mat_mod.f90 b/base/modules/serial/psb_s_csr_mat_mod.f90 index 6b4c51c7..884ede38 100644 --- a/base/modules/serial/psb_s_csr_mat_mod.f90 +++ b/base/modules/serial/psb_s_csr_mat_mod.f90 @@ -579,7 +579,111 @@ module psb_s_csr_mat_mod end subroutine psb_s_csr_scals end interface - !> \namespace psb_base_mod \class psb_ls_csr_sparse_mat + + type, extends(psb_s_csr_sparse_mat) :: psb_s_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => s_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_s_ecsr_csmm + procedure, pass(a) :: csmv => psb_s_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_s_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_s_ecsr_cmp_nerwp + procedure, pass(a) :: free => s_ecsr_free + procedure, pass(a) :: mold => psb_s_ecsr_mold + + end type psb_s_ecsr_sparse_mat + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_csmv + interface + subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_s_ecsr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ecsr_csmv + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_cp_from_coo + interface + subroutine psb_s_ecsr_cmp_nerwp(a,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ecsr_cmp_nerwp + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_cp_from_coo + interface + subroutine psb_s_cp_ecsr_from_coo(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ecsr_from_coo + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_cp_from_fmt + interface + subroutine psb_s_cp_ecsr_from_fmt(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ecsr_from_fmt + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_mv_from_coo + interface + subroutine psb_s_mv_ecsr_from_coo(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ecsr_from_coo + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_mv_from_fmt + interface + subroutine psb_s_mv_ecsr_from_fmt(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ecsr_from_fmt + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_s_ecsr_mold(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_ls_csr_sparse_mat !! \extends psb_ls_base_mat_mod::psb_ls_base_sparse_mat !! !! psb_ls_csr_sparse_mat type and the related methods. @@ -1178,6 +1282,26 @@ contains + function s_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function s_ecsr_get_fmt + + subroutine s_ecsr_free(a) + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_s_csr_sparse_mat%free() + + return + end subroutine s_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_s_mat_mod.F90 b/base/modules/serial/psb_s_mat_mod.F90 index eb444249..3e6b286a 100644 --- a/base/modules/serial/psb_s_mat_mod.F90 +++ b/base/modules/serial/psb_s_mat_mod.F90 @@ -79,7 +79,8 @@ module psb_s_mat_mod use psb_s_base_mat_mod - use psb_s_csr_mat_mod, only : psb_s_csr_sparse_mat, psb_ls_csr_sparse_mat + use psb_s_csr_mat_mod, only : psb_s_csr_sparse_mat, psb_ls_csr_sparse_mat,& + & psb_s_ecsr_sparse_mat use psb_s_csc_mat_mod, only : psb_s_csc_sparse_mat, psb_ls_csc_sparse_mat type :: psb_sspmat_type diff --git a/base/modules/serial/psb_z_csr_mat_mod.f90 b/base/modules/serial/psb_z_csr_mat_mod.f90 index 4ec8dd00..c328fead 100644 --- a/base/modules/serial/psb_z_csr_mat_mod.f90 +++ b/base/modules/serial/psb_z_csr_mat_mod.f90 @@ -579,7 +579,111 @@ module psb_z_csr_mat_mod end subroutine psb_z_csr_scals end interface - !> \namespace psb_base_mod \class psb_lz_csr_sparse_mat + + type, extends(psb_z_csr_sparse_mat) :: psb_z_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => z_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_z_ecsr_csmm + procedure, pass(a) :: csmv => psb_z_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_z_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_z_ecsr_cmp_nerwp + procedure, pass(a) :: free => z_ecsr_free + procedure, pass(a) :: mold => psb_z_ecsr_mold + + end type psb_z_ecsr_sparse_mat + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_csmv + interface + subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_z_ecsr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ecsr_csmv + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_cp_from_coo + interface + subroutine psb_z_ecsr_cmp_nerwp(a,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ecsr_cmp_nerwp + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_cp_from_coo + interface + subroutine psb_z_cp_ecsr_from_coo(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ecsr_from_coo + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_cp_from_fmt + interface + subroutine psb_z_cp_ecsr_from_fmt(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ecsr_from_fmt + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_mv_from_coo + interface + subroutine psb_z_mv_ecsr_from_coo(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ecsr_from_coo + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_mv_from_fmt + interface + subroutine psb_z_mv_ecsr_from_fmt(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ecsr_from_fmt + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_z_ecsr_mold(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_lz_csr_sparse_mat !! \extends psb_lz_base_mat_mod::psb_lz_base_sparse_mat !! !! psb_lz_csr_sparse_mat type and the related methods. @@ -1178,6 +1282,26 @@ contains + function z_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function z_ecsr_get_fmt + + subroutine z_ecsr_free(a) + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_z_csr_sparse_mat%free() + + return + end subroutine z_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_z_mat_mod.F90 b/base/modules/serial/psb_z_mat_mod.F90 index e70e48aa..148e9ab9 100644 --- a/base/modules/serial/psb_z_mat_mod.F90 +++ b/base/modules/serial/psb_z_mat_mod.F90 @@ -79,7 +79,8 @@ module psb_z_mat_mod use psb_z_base_mat_mod - use psb_z_csr_mat_mod, only : psb_z_csr_sparse_mat, psb_lz_csr_sparse_mat + use psb_z_csr_mat_mod, only : psb_z_csr_sparse_mat, psb_lz_csr_sparse_mat,& + & psb_z_ecsr_sparse_mat use psb_z_csc_mat_mod, only : psb_z_csc_sparse_mat, psb_lz_csc_sparse_mat type :: psb_zspmat_type diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90 index 55a91648..1fed09ba 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.f90 @@ -3550,6 +3550,269 @@ contains end subroutine psb_ccsrspspmm +subroutine psb_c_ecsr_mold(a,b,info) + use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_mold + use psb_error_mod + implicit none + class(psb_c_ecsr_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ecsr_mold + +subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_csmv + implicit none + class(psb_c_ecsr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='c_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_ecsr_cmp_nerwp + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) +end subroutine psb_c_ecsr_cmp_nerwp + +subroutine psb_c_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_c_base_mat_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_coo + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_cp_ecsr_from_coo + +subroutine psb_c_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_c_base_mat_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_coo + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_mv_ecsr_from_coo + +subroutine psb_c_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_c_base_mat_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_fmt + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_mv_ecsr_from_fmt + +subroutine psb_c_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_c_base_mat_mod + use psb_realloc_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_fmt + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_cp_ecsr_from_fmt + ! ! diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90 index 2c59c1a5..1bcc82a9 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.f90 @@ -3550,6 +3550,269 @@ contains end subroutine psb_dcsrspspmm +subroutine psb_d_ecsr_mold(a,b,info) + use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_mold + use psb_error_mod + implicit none + class(psb_d_ecsr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ecsr_mold + +subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_csmv + implicit none + class(psb_d_ecsr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='d_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_ecsr_cmp_nerwp + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) +end subroutine psb_d_ecsr_cmp_nerwp + +subroutine psb_d_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_d_base_mat_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_ecsr_from_coo + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_cp_ecsr_from_coo + +subroutine psb_d_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_d_base_mat_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_ecsr_from_coo + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_mv_ecsr_from_coo + +subroutine psb_d_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_d_base_mat_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_ecsr_from_fmt + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_mv_ecsr_from_fmt + +subroutine psb_d_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_d_base_mat_mod + use psb_realloc_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_ecsr_from_fmt + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_cp_ecsr_from_fmt + ! ! diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.f90 index 75358dbc..9670aeb9 100644 --- a/base/serial/impl/psb_s_csr_impl.f90 +++ b/base/serial/impl/psb_s_csr_impl.f90 @@ -3550,6 +3550,269 @@ contains end subroutine psb_scsrspspmm +subroutine psb_s_ecsr_mold(a,b,info) + use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_mold + use psb_error_mod + implicit none + class(psb_s_ecsr_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ecsr_mold + +subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_csmv + implicit none + class(psb_s_ecsr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='s_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_ecsr_cmp_nerwp + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) +end subroutine psb_s_ecsr_cmp_nerwp + +subroutine psb_s_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_s_base_mat_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_ecsr_from_coo + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_cp_ecsr_from_coo + +subroutine psb_s_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_s_base_mat_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_ecsr_from_coo + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_mv_ecsr_from_coo + +subroutine psb_s_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_s_base_mat_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_ecsr_from_fmt + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_mv_ecsr_from_fmt + +subroutine psb_s_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_s_base_mat_mod + use psb_realloc_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_ecsr_from_fmt + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_cp_ecsr_from_fmt + ! ! diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.f90 index 4f2693c0..e9847849 100644 --- a/base/serial/impl/psb_z_csr_impl.f90 +++ b/base/serial/impl/psb_z_csr_impl.f90 @@ -3550,6 +3550,269 @@ contains end subroutine psb_zcsrspspmm +subroutine psb_z_ecsr_mold(a,b,info) + use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_mold + use psb_error_mod + implicit none + class(psb_z_ecsr_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ecsr_mold + +subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_csmv + implicit none + class(psb_z_ecsr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='z_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_ecsr_cmp_nerwp + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) +end subroutine psb_z_ecsr_cmp_nerwp + +subroutine psb_z_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_z_base_mat_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_ecsr_from_coo + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_cp_ecsr_from_coo + +subroutine psb_z_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_z_base_mat_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_ecsr_from_coo + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_mv_ecsr_from_coo + +subroutine psb_z_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_z_base_mat_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_ecsr_from_fmt + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_mv_ecsr_from_fmt + +subroutine psb_z_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_z_base_mat_mod + use psb_realloc_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_ecsr_from_fmt + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_cp_ecsr_from_fmt + ! ! diff --git a/base/tools/psb_cspasb.f90 b/base/tools/psb_cspasb.f90 index ea7789f2..b4c957b0 100644 --- a/base/tools/psb_cspasb.f90 +++ b/base/tools/psb_cspasb.f90 @@ -175,13 +175,14 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) block character(len=1024) :: fname type(psb_c_coo_sparse_mat) :: acoo - type(psb_c_csr_sparse_mat), allocatable :: aclip, andclip + type(psb_c_csr_sparse_mat), allocatable :: aclip + type(psb_c_ecsr_sparse_mat), allocatable :: andclip allocate(aclip,andclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - call aclip%mv_from_coo(acoo,info) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) call andclip%mv_from_coo(acoo,info) - call move_alloc(aclip,a%ad) call move_alloc(andclip,a%and) if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' diff --git a/base/tools/psb_dspasb.f90 b/base/tools/psb_dspasb.f90 index 89ceef8d..5ebc47e8 100644 --- a/base/tools/psb_dspasb.f90 +++ b/base/tools/psb_dspasb.f90 @@ -175,13 +175,14 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) block character(len=1024) :: fname type(psb_d_coo_sparse_mat) :: acoo - type(psb_d_csr_sparse_mat), allocatable :: aclip, andclip + type(psb_d_csr_sparse_mat), allocatable :: aclip + type(psb_d_ecsr_sparse_mat), allocatable :: andclip allocate(aclip,andclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - call aclip%mv_from_coo(acoo,info) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) call andclip%mv_from_coo(acoo,info) - call move_alloc(aclip,a%ad) call move_alloc(andclip,a%and) if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' diff --git a/base/tools/psb_sspasb.f90 b/base/tools/psb_sspasb.f90 index 14ad5246..5423c2a7 100644 --- a/base/tools/psb_sspasb.f90 +++ b/base/tools/psb_sspasb.f90 @@ -175,13 +175,14 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) block character(len=1024) :: fname type(psb_s_coo_sparse_mat) :: acoo - type(psb_s_csr_sparse_mat), allocatable :: aclip, andclip + type(psb_s_csr_sparse_mat), allocatable :: aclip + type(psb_s_ecsr_sparse_mat), allocatable :: andclip allocate(aclip,andclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - call aclip%mv_from_coo(acoo,info) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) call andclip%mv_from_coo(acoo,info) - call move_alloc(aclip,a%ad) call move_alloc(andclip,a%and) if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' diff --git a/base/tools/psb_zspasb.f90 b/base/tools/psb_zspasb.f90 index f65be363..66fc8cd7 100644 --- a/base/tools/psb_zspasb.f90 +++ b/base/tools/psb_zspasb.f90 @@ -175,13 +175,14 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) block character(len=1024) :: fname type(psb_z_coo_sparse_mat) :: acoo - type(psb_z_csr_sparse_mat), allocatable :: aclip, andclip + type(psb_z_csr_sparse_mat), allocatable :: aclip + type(psb_z_ecsr_sparse_mat), allocatable :: andclip allocate(aclip,andclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - call aclip%mv_from_coo(acoo,info) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) call andclip%mv_from_coo(acoo,info) - call move_alloc(aclip,a%ad) call move_alloc(andclip,a%and) if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index 5f040075..57fda01a 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -2,7 +2,7 @@ BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR BJAC Preconditioner NONE DIAG BJAC CSR Storage format for matrix A: CSR COO -140 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) +100 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) 3 Partition: 1 BLOCK 3 3D 2 Stopping criterion 1 2 0100 MAXIT From 86b8a261efd23d244a034a2b1826cdc3ecae2c43 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Thu, 19 Jan 2023 08:36:22 -0500 Subject: [PATCH 003/110] Fixed conversion bug, changed SPASB interface --- base/modules/tools/psb_c_tools_mod.F90 | 3 ++- base/modules/tools/psb_d_tools_mod.F90 | 3 ++- base/modules/tools/psb_s_tools_mod.F90 | 3 ++- base/modules/tools/psb_z_tools_mod.F90 | 3 ++- base/psblas/psb_cspmm.f90 | 6 +++--- base/psblas/psb_dspmm.f90 | 6 +++--- base/psblas/psb_sspmm.f90 | 6 +++--- base/psblas/psb_zspmm.f90 | 6 +++--- base/serial/impl/psb_c_csr_impl.f90 | 9 +++------ base/serial/impl/psb_d_csr_impl.f90 | 9 +++------ base/serial/impl/psb_s_csr_impl.f90 | 9 +++------ base/serial/impl/psb_z_csr_impl.f90 | 9 +++------ base/tools/psb_cspasb.f90 | 28 ++++++++++++++++++++------ base/tools/psb_dspasb.f90 | 28 ++++++++++++++++++++------ base/tools/psb_sspasb.f90 | 28 ++++++++++++++++++++------ base/tools/psb_zspasb.f90 | 28 ++++++++++++++++++++------ test/pargen/psb_d_pde3d.F90 | 4 ++-- test/pargen/runs/ppde.inp | 6 +++--- 18 files changed, 125 insertions(+), 69 deletions(-) diff --git a/base/modules/tools/psb_c_tools_mod.F90 b/base/modules/tools/psb_c_tools_mod.F90 index 2de8f906..0ed2d82c 100644 --- a/base/modules/tools/psb_c_tools_mod.F90 +++ b/base/modules/tools/psb_c_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_c_tools_mod end interface interface psb_spasb - subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_cspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_c_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_c_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_cspasb end interface diff --git a/base/modules/tools/psb_d_tools_mod.F90 b/base/modules/tools/psb_d_tools_mod.F90 index 30e45d53..26f83201 100644 --- a/base/modules/tools/psb_d_tools_mod.F90 +++ b/base/modules/tools/psb_d_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_d_tools_mod end interface interface psb_spasb - subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_dspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_d_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_d_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_dspasb end interface diff --git a/base/modules/tools/psb_s_tools_mod.F90 b/base/modules/tools/psb_s_tools_mod.F90 index 5d2f8d00..0f70a31a 100644 --- a/base/modules/tools/psb_s_tools_mod.F90 +++ b/base/modules/tools/psb_s_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_s_tools_mod end interface interface psb_spasb - subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_sspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_s_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_s_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_sspasb end interface diff --git a/base/modules/tools/psb_z_tools_mod.F90 b/base/modules/tools/psb_z_tools_mod.F90 index 9d6bd77b..1f24e05a 100644 --- a/base/modules/tools/psb_z_tools_mod.F90 +++ b/base/modules/tools/psb_z_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_z_tools_mod end interface interface psb_spasb - subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_zspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_z_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_z_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_zspasb end interface diff --git a/base/psblas/psb_cspmm.f90 b/base/psblas/psb_cspmm.f90 index 555461df..84d8a7d8 100644 --- a/base/psblas/psb_cspmm.f90 +++ b/base/psblas/psb_cspmm.f90 @@ -179,11 +179,11 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (.true.) then - call psi_swapdata(psb_swap_send_,& + if (allocated(a%ad)) then + if (doswap_) call psi_swapdata(psb_swap_send_,& & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%ad%spmm(alpha,x%v,beta,y%v,info) - call psi_swapdata(psb_swap_recv_,& + if (doswap_) call psi_swapdata(psb_swap_recv_,& & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%and%spmm(alpha,x%v,cone,y%v,info) diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index be8a493f..d5897f82 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -179,11 +179,11 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (.true.) then - call psi_swapdata(psb_swap_send_,& + if (allocated(a%ad)) then + if (doswap_) call psi_swapdata(psb_swap_send_,& & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%ad%spmm(alpha,x%v,beta,y%v,info) - call psi_swapdata(psb_swap_recv_,& + if (doswap_) call psi_swapdata(psb_swap_recv_,& & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%and%spmm(alpha,x%v,done,y%v,info) diff --git a/base/psblas/psb_sspmm.f90 b/base/psblas/psb_sspmm.f90 index 79bfbdd1..7c1e0ab3 100644 --- a/base/psblas/psb_sspmm.f90 +++ b/base/psblas/psb_sspmm.f90 @@ -179,11 +179,11 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (.true.) then - call psi_swapdata(psb_swap_send_,& + if (allocated(a%ad)) then + if (doswap_) call psi_swapdata(psb_swap_send_,& & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%ad%spmm(alpha,x%v,beta,y%v,info) - call psi_swapdata(psb_swap_recv_,& + if (doswap_) call psi_swapdata(psb_swap_recv_,& & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%and%spmm(alpha,x%v,sone,y%v,info) diff --git a/base/psblas/psb_zspmm.f90 b/base/psblas/psb_zspmm.f90 index f248db8b..4dc73f83 100644 --- a/base/psblas/psb_zspmm.f90 +++ b/base/psblas/psb_zspmm.f90 @@ -179,11 +179,11 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (.true.) then - call psi_swapdata(psb_swap_send_,& + if (allocated(a%ad)) then + if (doswap_) call psi_swapdata(psb_swap_send_,& & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%ad%spmm(alpha,x%v,beta,y%v,info) - call psi_swapdata(psb_swap_recv_,& + if (doswap_) call psi_swapdata(psb_swap_recv_,& & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%and%spmm(alpha,x%v,zone,y%v,info) diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90 index 1fed09ba..4744d947 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.f90 @@ -3643,9 +3643,8 @@ subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if - if (((beta == cone).and..not.(tra.or.ctra))& - & .or.(a%is_triangle()).or.(a%is_unit())) then - + if ((beta == cone).and.& + & .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then call psb_c_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,& & a%nnerws,a%nerwp,x,y) else @@ -3672,9 +3671,6 @@ contains if (alpha == czero) return - - - if (alpha == cone) then !$omp parallel do private(ir,i,j,acc) do ir=1,nnerws @@ -3740,6 +3736,7 @@ subroutine psb_c_ecsr_cmp_nerwp(a,info) end if end do call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws end subroutine psb_c_ecsr_cmp_nerwp subroutine psb_c_cp_ecsr_from_coo(a,b,info) diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90 index 1bcc82a9..6d2b58ad 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.f90 @@ -3643,9 +3643,8 @@ subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if - if (((beta == done).and..not.(tra.or.ctra))& - & .or.(a%is_triangle()).or.(a%is_unit())) then - + if ((beta == done).and.& + & .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then call psb_d_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,& & a%nnerws,a%nerwp,x,y) else @@ -3672,9 +3671,6 @@ contains if (alpha == dzero) return - - - if (alpha == done) then !$omp parallel do private(ir,i,j,acc) do ir=1,nnerws @@ -3740,6 +3736,7 @@ subroutine psb_d_ecsr_cmp_nerwp(a,info) end if end do call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws end subroutine psb_d_ecsr_cmp_nerwp subroutine psb_d_cp_ecsr_from_coo(a,b,info) diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.f90 index 9670aeb9..87cfff68 100644 --- a/base/serial/impl/psb_s_csr_impl.f90 +++ b/base/serial/impl/psb_s_csr_impl.f90 @@ -3643,9 +3643,8 @@ subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if - if (((beta == sone).and..not.(tra.or.ctra))& - & .or.(a%is_triangle()).or.(a%is_unit())) then - + if ((beta == sone).and.& + & .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then call psb_s_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,& & a%nnerws,a%nerwp,x,y) else @@ -3672,9 +3671,6 @@ contains if (alpha == szero) return - - - if (alpha == sone) then !$omp parallel do private(ir,i,j,acc) do ir=1,nnerws @@ -3740,6 +3736,7 @@ subroutine psb_s_ecsr_cmp_nerwp(a,info) end if end do call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws end subroutine psb_s_ecsr_cmp_nerwp subroutine psb_s_cp_ecsr_from_coo(a,b,info) diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.f90 index e9847849..a4a2dd5a 100644 --- a/base/serial/impl/psb_z_csr_impl.f90 +++ b/base/serial/impl/psb_z_csr_impl.f90 @@ -3643,9 +3643,8 @@ subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if - if (((beta == zone).and..not.(tra.or.ctra))& - & .or.(a%is_triangle()).or.(a%is_unit())) then - + if ((beta == zone).and.& + & .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then call psb_z_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,& & a%nnerws,a%nerwp,x,y) else @@ -3672,9 +3671,6 @@ contains if (alpha == zzero) return - - - if (alpha == zone) then !$omp parallel do private(ir,i,j,acc) do ir=1,nnerws @@ -3740,6 +3736,7 @@ subroutine psb_z_ecsr_cmp_nerwp(a,info) end if end do call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws end subroutine psb_z_ecsr_cmp_nerwp subroutine psb_z_cp_ecsr_from_coo(a,b,info) diff --git a/base/tools/psb_cspasb.f90 b/base/tools/psb_cspasb.f90 index b4c957b0..46258139 100644 --- a/base/tools/psb_cspasb.f90 +++ b/base/tools/psb_cspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_cspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_c_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,19 +177,26 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) end if - if (.true.) then + if (bld_and_) then block character(len=1024) :: fname type(psb_c_coo_sparse_mat) :: acoo type(psb_c_csr_sparse_mat), allocatable :: aclip type(psb_c_ecsr_sparse_mat), allocatable :: andclip - allocate(aclip,andclip) + logical, parameter :: use_ecsr=.false. + allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' open(25,file=fname) @@ -200,6 +213,9 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col end if end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() diff --git a/base/tools/psb_dspasb.f90 b/base/tools/psb_dspasb.f90 index 5ebc47e8..6beb0e6f 100644 --- a/base/tools/psb_dspasb.f90 +++ b/base/tools/psb_dspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_dspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_d_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,19 +177,26 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) end if - if (.true.) then + if (bld_and_) then block character(len=1024) :: fname type(psb_d_coo_sparse_mat) :: acoo type(psb_d_csr_sparse_mat), allocatable :: aclip type(psb_d_ecsr_sparse_mat), allocatable :: andclip - allocate(aclip,andclip) + logical, parameter :: use_ecsr=.true. + allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' open(25,file=fname) @@ -200,6 +213,9 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col end if end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() diff --git a/base/tools/psb_sspasb.f90 b/base/tools/psb_sspasb.f90 index 5423c2a7..0edae30e 100644 --- a/base/tools/psb_sspasb.f90 +++ b/base/tools/psb_sspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_sspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_s_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,19 +177,26 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) end if - if (.true.) then + if (bld_and_) then block character(len=1024) :: fname type(psb_s_coo_sparse_mat) :: acoo type(psb_s_csr_sparse_mat), allocatable :: aclip type(psb_s_ecsr_sparse_mat), allocatable :: andclip - allocate(aclip,andclip) + logical, parameter :: use_ecsr=.false. + allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' open(25,file=fname) @@ -200,6 +213,9 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col end if end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() diff --git a/base/tools/psb_zspasb.f90 b/base/tools/psb_zspasb.f90 index 66fc8cd7..cd77de15 100644 --- a/base/tools/psb_zspasb.f90 +++ b/base/tools/psb_zspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_zspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_z_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,19 +177,26 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) end if - if (.true.) then + if (bld_and_) then block character(len=1024) :: fname type(psb_z_coo_sparse_mat) :: acoo type(psb_z_csr_sparse_mat), allocatable :: aclip type(psb_z_ecsr_sparse_mat), allocatable :: andclip - allocate(aclip,andclip) + logical, parameter :: use_ecsr=.false. + allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' open(25,file=fname) @@ -200,6 +213,9 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col end if end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() diff --git a/test/pargen/psb_d_pde3d.F90 b/test/pargen/psb_d_pde3d.F90 index d4eeccf2..cd503d29 100644 --- a/test/pargen/psb_d_pde3d.F90 +++ b/test/pargen/psb_d_pde3d.F90 @@ -680,9 +680,9 @@ contains t1 = psb_wtime() if (info == psb_success_) then if (present(amold)) then - call psb_spasb(a,desc_a,info,mold=amold) + call psb_spasb(a,desc_a,info,mold=amold,bld_and=.true.) else - call psb_spasb(a,desc_a,info,afmt=afmt) + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=.true.) end if end if call psb_barrier(ctxt) diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index 57fda01a..c70a973f 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -2,11 +2,11 @@ BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR BJAC Preconditioner NONE DIAG BJAC CSR Storage format for matrix A: CSR COO -100 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) +200 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) 3 Partition: 1 BLOCK 3 3D 2 Stopping criterion 1 2 -0100 MAXIT -05 ITRACE +0300 MAXIT +10 ITRACE 002 IRST restart for RGMRES and BiCGSTABL ILU Block Solver ILU,ILUT,INVK,AINVT,AORTH NONE If ILU : MILU or NONE othewise ignored From 1d5faa388dd5fdbfc39a93f318c2d2eaa9eb947b Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 27 Sep 2023 16:23:17 +0200 Subject: [PATCH 004/110] Merge psblas-ext into psblas, step 1: ext storage formats. --- Makefile | 10 +- ext/Makefile | 84 +++ ext/impl/Makefile | 412 ++++++++++++++ ext/impl/psb_c_cp_dia_from_coo.f90 | 70 +++ ext/impl/psb_c_cp_dia_to_coo.f90 | 65 +++ ext/impl/psb_c_cp_ell_from_coo.f90 | 71 +++ ext/impl/psb_c_cp_ell_from_fmt.f90 | 65 +++ ext/impl/psb_c_cp_ell_to_coo.f90 | 69 +++ ext/impl/psb_c_cp_ell_to_fmt.f90 | 67 +++ ext/impl/psb_c_cp_hdia_from_coo.f90 | 222 ++++++++ ext/impl/psb_c_cp_hdia_to_coo.f90 | 84 +++ ext/impl/psb_c_cp_hll_from_coo.f90 | 74 +++ ext/impl/psb_c_cp_hll_from_fmt.f90 | 70 +++ ext/impl/psb_c_cp_hll_to_coo.f90 | 104 ++++ ext/impl/psb_c_cp_hll_to_fmt.f90 | 68 +++ ext/impl/psb_c_dia_aclsum.f90 | 87 +++ ext/impl/psb_c_dia_allocate_mnnz.f90 | 88 +++ ext/impl/psb_c_dia_arwsum.f90 | 87 +++ ext/impl/psb_c_dia_colsum.f90 | 87 +++ ext/impl/psb_c_dia_csgetptn.f90 | 188 ++++++ ext/impl/psb_c_dia_csgetrow.f90 | 199 +++++++ ext/impl/psb_c_dia_csmm.f90 | 134 +++++ ext/impl/psb_c_dia_csmv.f90 | 135 +++++ ext/impl/psb_c_dia_get_diag.f90 | 75 +++ ext/impl/psb_c_dia_maxval.f90 | 54 ++ ext/impl/psb_c_dia_mold.f90 | 61 ++ ext/impl/psb_c_dia_print.f90 | 148 +++++ ext/impl/psb_c_dia_reallocate_nz.f90 | 56 ++ ext/impl/psb_c_dia_reinit.f90 | 78 +++ ext/impl/psb_c_dia_rowsum.f90 | 87 +++ ext/impl/psb_c_dia_scal.f90 | 108 ++++ ext/impl/psb_c_dia_scals.f90 | 63 +++ ext/impl/psb_c_dns_mat_impl.f90 | 724 ++++++++++++++++++++++++ ext/impl/psb_c_ell_aclsum.f90 | 82 +++ ext/impl/psb_c_ell_allocate_mnnz.f90 | 91 +++ ext/impl/psb_c_ell_arwsum.f90 | 78 +++ ext/impl/psb_c_ell_colsum.f90 | 80 +++ ext/impl/psb_c_ell_csgetblk.f90 | 83 +++ ext/impl/psb_c_ell_csgetptn.f90 | 189 +++++++ ext/impl/psb_c_ell_csgetrow.f90 | 205 +++++++ ext/impl/psb_c_ell_csmm.f90 | 377 ++++++++++++ ext/impl/psb_c_ell_csmv.f90 | 433 ++++++++++++++ ext/impl/psb_c_ell_csnm1.f90 | 73 +++ ext/impl/psb_c_ell_csnmi.f90 | 58 ++ ext/impl/psb_c_ell_csput.f90 | 208 +++++++ ext/impl/psb_c_ell_cssm.f90 | 375 ++++++++++++ ext/impl/psb_c_ell_cssv.f90 | 372 ++++++++++++ ext/impl/psb_c_ell_get_diag.f90 | 77 +++ ext/impl/psb_c_ell_maxval.f90 | 60 ++ ext/impl/psb_c_ell_mold.f90 | 63 +++ ext/impl/psb_c_ell_print.f90 | 99 ++++ ext/impl/psb_c_ell_reallocate_nz.f90 | 66 +++ ext/impl/psb_c_ell_reinit.f90 | 77 +++ ext/impl/psb_c_ell_rowsum.f90 | 77 +++ ext/impl/psb_c_ell_scal.f90 | 99 ++++ ext/impl/psb_c_ell_scals.f90 | 63 +++ ext/impl/psb_c_ell_trim.f90 | 60 ++ ext/impl/psb_c_hdia_allocate_mnnz.f90 | 75 +++ ext/impl/psb_c_hdia_csmv.f90 | 162 ++++++ ext/impl/psb_c_hdia_mold.f90 | 63 +++ ext/impl/psb_c_hdia_print.f90 | 121 ++++ ext/impl/psb_c_hll_aclsum.f90 | 109 ++++ ext/impl/psb_c_hll_allocate_mnnz.f90 | 93 +++ ext/impl/psb_c_hll_arwsum.f90 | 108 ++++ ext/impl/psb_c_hll_colsum.f90 | 109 ++++ ext/impl/psb_c_hll_csgetblk.f90 | 83 +++ ext/impl/psb_c_hll_csgetptn.f90 | 209 +++++++ ext/impl/psb_c_hll_csgetrow.f90 | 221 ++++++++ ext/impl/psb_c_hll_csmm.f90 | 235 ++++++++ ext/impl/psb_c_hll_csmv.f90 | 563 ++++++++++++++++++ ext/impl/psb_c_hll_csnm1.f90 | 111 ++++ ext/impl/psb_c_hll_csnmi.f90 | 104 ++++ ext/impl/psb_c_hll_csput.f90 | 233 ++++++++ ext/impl/psb_c_hll_cssm.f90 | 506 +++++++++++++++++ ext/impl/psb_c_hll_cssv.f90 | 498 ++++++++++++++++ ext/impl/psb_c_hll_get_diag.f90 | 110 ++++ ext/impl/psb_c_hll_maxval.f90 | 45 ++ ext/impl/psb_c_hll_mold.f90 | 65 +++ ext/impl/psb_c_hll_print.f90 | 134 +++++ ext/impl/psb_c_hll_reallocate_nz.f90 | 64 +++ ext/impl/psb_c_hll_reinit.f90 | 77 +++ ext/impl/psb_c_hll_rowsum.f90 | 110 ++++ ext/impl/psb_c_hll_scal.f90 | 135 +++++ ext/impl/psb_c_hll_scals.f90 | 63 +++ ext/impl/psb_c_mv_dia_from_coo.f90 | 62 ++ ext/impl/psb_c_mv_dia_to_coo.f90 | 55 ++ ext/impl/psb_c_mv_ell_from_coo.f90 | 56 ++ ext/impl/psb_c_mv_ell_from_fmt.f90 | 67 +++ ext/impl/psb_c_mv_ell_to_coo.f90 | 89 +++ ext/impl/psb_c_mv_ell_to_fmt.f90 | 67 +++ ext/impl/psb_c_mv_hdia_from_coo.f90 | 60 ++ ext/impl/psb_c_mv_hdia_to_coo.f90 | 55 ++ ext/impl/psb_c_mv_hll_from_coo.f90 | 58 ++ ext/impl/psb_c_mv_hll_from_fmt.f90 | 70 +++ ext/impl/psb_c_mv_hll_to_coo.f90 | 56 ++ ext/impl/psb_c_mv_hll_to_fmt.f90 | 69 +++ ext/impl/psb_d_cp_dia_from_coo.f90 | 70 +++ ext/impl/psb_d_cp_dia_to_coo.f90 | 65 +++ ext/impl/psb_d_cp_ell_from_coo.f90 | 71 +++ ext/impl/psb_d_cp_ell_from_fmt.f90 | 65 +++ ext/impl/psb_d_cp_ell_to_coo.f90 | 69 +++ ext/impl/psb_d_cp_ell_to_fmt.f90 | 67 +++ ext/impl/psb_d_cp_hdia_from_coo.f90 | 222 ++++++++ ext/impl/psb_d_cp_hdia_to_coo.f90 | 84 +++ ext/impl/psb_d_cp_hll_from_coo.f90 | 74 +++ ext/impl/psb_d_cp_hll_from_fmt.f90 | 70 +++ ext/impl/psb_d_cp_hll_to_coo.f90 | 104 ++++ ext/impl/psb_d_cp_hll_to_fmt.f90 | 68 +++ ext/impl/psb_d_dia_aclsum.f90 | 87 +++ ext/impl/psb_d_dia_allocate_mnnz.f90 | 88 +++ ext/impl/psb_d_dia_arwsum.f90 | 87 +++ ext/impl/psb_d_dia_colsum.f90 | 87 +++ ext/impl/psb_d_dia_csgetptn.f90 | 188 ++++++ ext/impl/psb_d_dia_csgetrow.f90 | 199 +++++++ ext/impl/psb_d_dia_csmm.f90 | 134 +++++ ext/impl/psb_d_dia_csmv.f90 | 135 +++++ ext/impl/psb_d_dia_get_diag.f90 | 75 +++ ext/impl/psb_d_dia_maxval.f90 | 54 ++ ext/impl/psb_d_dia_mold.f90 | 61 ++ ext/impl/psb_d_dia_print.f90 | 148 +++++ ext/impl/psb_d_dia_reallocate_nz.f90 | 56 ++ ext/impl/psb_d_dia_reinit.f90 | 78 +++ ext/impl/psb_d_dia_rowsum.f90 | 87 +++ ext/impl/psb_d_dia_scal.f90 | 108 ++++ ext/impl/psb_d_dia_scals.f90 | 63 +++ ext/impl/psb_d_dns_mat_impl.f90 | 724 ++++++++++++++++++++++++ ext/impl/psb_d_ell_aclsum.f90 | 82 +++ ext/impl/psb_d_ell_allocate_mnnz.f90 | 91 +++ ext/impl/psb_d_ell_arwsum.f90 | 78 +++ ext/impl/psb_d_ell_colsum.f90 | 80 +++ ext/impl/psb_d_ell_csgetblk.f90 | 83 +++ ext/impl/psb_d_ell_csgetptn.f90 | 189 +++++++ ext/impl/psb_d_ell_csgetrow.f90 | 205 +++++++ ext/impl/psb_d_ell_csmm.f90 | 377 ++++++++++++ ext/impl/psb_d_ell_csmv.f90 | 433 ++++++++++++++ ext/impl/psb_d_ell_csnm1.f90 | 73 +++ ext/impl/psb_d_ell_csnmi.f90 | 58 ++ ext/impl/psb_d_ell_csput.f90 | 208 +++++++ ext/impl/psb_d_ell_cssm.f90 | 375 ++++++++++++ ext/impl/psb_d_ell_cssv.f90 | 372 ++++++++++++ ext/impl/psb_d_ell_get_diag.f90 | 77 +++ ext/impl/psb_d_ell_maxval.f90 | 60 ++ ext/impl/psb_d_ell_mold.f90 | 63 +++ ext/impl/psb_d_ell_print.f90 | 99 ++++ ext/impl/psb_d_ell_reallocate_nz.f90 | 66 +++ ext/impl/psb_d_ell_reinit.f90 | 77 +++ ext/impl/psb_d_ell_rowsum.f90 | 77 +++ ext/impl/psb_d_ell_scal.f90 | 99 ++++ ext/impl/psb_d_ell_scals.f90 | 63 +++ ext/impl/psb_d_ell_trim.f90 | 60 ++ ext/impl/psb_d_hdia_allocate_mnnz.f90 | 75 +++ ext/impl/psb_d_hdia_csmv.f90 | 162 ++++++ ext/impl/psb_d_hdia_mold.f90 | 63 +++ ext/impl/psb_d_hdia_print.f90 | 121 ++++ ext/impl/psb_d_hll_aclsum.f90 | 109 ++++ ext/impl/psb_d_hll_allocate_mnnz.f90 | 93 +++ ext/impl/psb_d_hll_arwsum.f90 | 108 ++++ ext/impl/psb_d_hll_colsum.f90 | 109 ++++ ext/impl/psb_d_hll_csgetblk.f90 | 83 +++ ext/impl/psb_d_hll_csgetptn.f90 | 209 +++++++ ext/impl/psb_d_hll_csgetrow.f90 | 221 ++++++++ ext/impl/psb_d_hll_csmm.f90 | 235 ++++++++ ext/impl/psb_d_hll_csmv.f90 | 563 ++++++++++++++++++ ext/impl/psb_d_hll_csnm1.f90 | 111 ++++ ext/impl/psb_d_hll_csnmi.f90 | 104 ++++ ext/impl/psb_d_hll_csput.f90 | 233 ++++++++ ext/impl/psb_d_hll_cssm.f90 | 506 +++++++++++++++++ ext/impl/psb_d_hll_cssv.f90 | 498 ++++++++++++++++ ext/impl/psb_d_hll_get_diag.f90 | 110 ++++ ext/impl/psb_d_hll_maxval.f90 | 45 ++ ext/impl/psb_d_hll_mold.f90 | 65 +++ ext/impl/psb_d_hll_print.f90 | 134 +++++ ext/impl/psb_d_hll_reallocate_nz.f90 | 64 +++ ext/impl/psb_d_hll_reinit.f90 | 77 +++ ext/impl/psb_d_hll_rowsum.f90 | 110 ++++ ext/impl/psb_d_hll_scal.f90 | 135 +++++ ext/impl/psb_d_hll_scals.f90 | 63 +++ ext/impl/psb_d_mv_dia_from_coo.f90 | 62 ++ ext/impl/psb_d_mv_dia_to_coo.f90 | 55 ++ ext/impl/psb_d_mv_ell_from_coo.f90 | 56 ++ ext/impl/psb_d_mv_ell_from_fmt.f90 | 67 +++ ext/impl/psb_d_mv_ell_to_coo.f90 | 89 +++ ext/impl/psb_d_mv_ell_to_fmt.f90 | 67 +++ ext/impl/psb_d_mv_hdia_from_coo.f90 | 60 ++ ext/impl/psb_d_mv_hdia_to_coo.f90 | 55 ++ ext/impl/psb_d_mv_hll_from_coo.f90 | 58 ++ ext/impl/psb_d_mv_hll_from_fmt.f90 | 70 +++ ext/impl/psb_d_mv_hll_to_coo.f90 | 56 ++ ext/impl/psb_d_mv_hll_to_fmt.f90 | 69 +++ ext/impl/psb_s_cp_dia_from_coo.f90 | 70 +++ ext/impl/psb_s_cp_dia_to_coo.f90 | 65 +++ ext/impl/psb_s_cp_ell_from_coo.f90 | 71 +++ ext/impl/psb_s_cp_ell_from_fmt.f90 | 65 +++ ext/impl/psb_s_cp_ell_to_coo.f90 | 69 +++ ext/impl/psb_s_cp_ell_to_fmt.f90 | 67 +++ ext/impl/psb_s_cp_hdia_from_coo.f90 | 222 ++++++++ ext/impl/psb_s_cp_hdia_to_coo.f90 | 84 +++ ext/impl/psb_s_cp_hll_from_coo.f90 | 74 +++ ext/impl/psb_s_cp_hll_from_fmt.f90 | 70 +++ ext/impl/psb_s_cp_hll_to_coo.f90 | 104 ++++ ext/impl/psb_s_cp_hll_to_fmt.f90 | 68 +++ ext/impl/psb_s_dia_aclsum.f90 | 87 +++ ext/impl/psb_s_dia_allocate_mnnz.f90 | 88 +++ ext/impl/psb_s_dia_arwsum.f90 | 87 +++ ext/impl/psb_s_dia_colsum.f90 | 87 +++ ext/impl/psb_s_dia_csgetptn.f90 | 188 ++++++ ext/impl/psb_s_dia_csgetrow.f90 | 199 +++++++ ext/impl/psb_s_dia_csmm.f90 | 134 +++++ ext/impl/psb_s_dia_csmv.f90 | 135 +++++ ext/impl/psb_s_dia_get_diag.f90 | 75 +++ ext/impl/psb_s_dia_maxval.f90 | 54 ++ ext/impl/psb_s_dia_mold.f90 | 61 ++ ext/impl/psb_s_dia_print.f90 | 148 +++++ ext/impl/psb_s_dia_reallocate_nz.f90 | 56 ++ ext/impl/psb_s_dia_reinit.f90 | 78 +++ ext/impl/psb_s_dia_rowsum.f90 | 87 +++ ext/impl/psb_s_dia_scal.f90 | 108 ++++ ext/impl/psb_s_dia_scals.f90 | 63 +++ ext/impl/psb_s_dns_mat_impl.f90 | 724 ++++++++++++++++++++++++ ext/impl/psb_s_ell_aclsum.f90 | 82 +++ ext/impl/psb_s_ell_allocate_mnnz.f90 | 91 +++ ext/impl/psb_s_ell_arwsum.f90 | 78 +++ ext/impl/psb_s_ell_colsum.f90 | 80 +++ ext/impl/psb_s_ell_csgetblk.f90 | 83 +++ ext/impl/psb_s_ell_csgetptn.f90 | 189 +++++++ ext/impl/psb_s_ell_csgetrow.f90 | 205 +++++++ ext/impl/psb_s_ell_csmm.f90 | 377 ++++++++++++ ext/impl/psb_s_ell_csmv.f90 | 433 ++++++++++++++ ext/impl/psb_s_ell_csnm1.f90 | 73 +++ ext/impl/psb_s_ell_csnmi.f90 | 58 ++ ext/impl/psb_s_ell_csput.f90 | 208 +++++++ ext/impl/psb_s_ell_cssm.f90 | 375 ++++++++++++ ext/impl/psb_s_ell_cssv.f90 | 372 ++++++++++++ ext/impl/psb_s_ell_get_diag.f90 | 77 +++ ext/impl/psb_s_ell_maxval.f90 | 60 ++ ext/impl/psb_s_ell_mold.f90 | 63 +++ ext/impl/psb_s_ell_print.f90 | 99 ++++ ext/impl/psb_s_ell_reallocate_nz.f90 | 66 +++ ext/impl/psb_s_ell_reinit.f90 | 77 +++ ext/impl/psb_s_ell_rowsum.f90 | 77 +++ ext/impl/psb_s_ell_scal.f90 | 99 ++++ ext/impl/psb_s_ell_scals.f90 | 63 +++ ext/impl/psb_s_ell_trim.f90 | 60 ++ ext/impl/psb_s_hdia_allocate_mnnz.f90 | 75 +++ ext/impl/psb_s_hdia_csmv.f90 | 162 ++++++ ext/impl/psb_s_hdia_mold.f90 | 63 +++ ext/impl/psb_s_hdia_print.f90 | 121 ++++ ext/impl/psb_s_hll_aclsum.f90 | 109 ++++ ext/impl/psb_s_hll_allocate_mnnz.f90 | 93 +++ ext/impl/psb_s_hll_arwsum.f90 | 108 ++++ ext/impl/psb_s_hll_colsum.f90 | 109 ++++ ext/impl/psb_s_hll_csgetblk.f90 | 83 +++ ext/impl/psb_s_hll_csgetptn.f90 | 209 +++++++ ext/impl/psb_s_hll_csgetrow.f90 | 221 ++++++++ ext/impl/psb_s_hll_csmm.f90 | 235 ++++++++ ext/impl/psb_s_hll_csmv.f90 | 563 ++++++++++++++++++ ext/impl/psb_s_hll_csnm1.f90 | 111 ++++ ext/impl/psb_s_hll_csnmi.f90 | 104 ++++ ext/impl/psb_s_hll_csput.f90 | 233 ++++++++ ext/impl/psb_s_hll_cssm.f90 | 506 +++++++++++++++++ ext/impl/psb_s_hll_cssv.f90 | 498 ++++++++++++++++ ext/impl/psb_s_hll_get_diag.f90 | 110 ++++ ext/impl/psb_s_hll_maxval.f90 | 45 ++ ext/impl/psb_s_hll_mold.f90 | 65 +++ ext/impl/psb_s_hll_print.f90 | 134 +++++ ext/impl/psb_s_hll_reallocate_nz.f90 | 64 +++ ext/impl/psb_s_hll_reinit.f90 | 77 +++ ext/impl/psb_s_hll_rowsum.f90 | 110 ++++ ext/impl/psb_s_hll_scal.f90 | 135 +++++ ext/impl/psb_s_hll_scals.f90 | 63 +++ ext/impl/psb_s_mv_dia_from_coo.f90 | 62 ++ ext/impl/psb_s_mv_dia_to_coo.f90 | 55 ++ ext/impl/psb_s_mv_ell_from_coo.f90 | 56 ++ ext/impl/psb_s_mv_ell_from_fmt.f90 | 67 +++ ext/impl/psb_s_mv_ell_to_coo.f90 | 89 +++ ext/impl/psb_s_mv_ell_to_fmt.f90 | 67 +++ ext/impl/psb_s_mv_hdia_from_coo.f90 | 60 ++ ext/impl/psb_s_mv_hdia_to_coo.f90 | 55 ++ ext/impl/psb_s_mv_hll_from_coo.f90 | 58 ++ ext/impl/psb_s_mv_hll_from_fmt.f90 | 70 +++ ext/impl/psb_s_mv_hll_to_coo.f90 | 56 ++ ext/impl/psb_s_mv_hll_to_fmt.f90 | 69 +++ ext/impl/psb_z_cp_dia_from_coo.f90 | 70 +++ ext/impl/psb_z_cp_dia_to_coo.f90 | 65 +++ ext/impl/psb_z_cp_ell_from_coo.f90 | 71 +++ ext/impl/psb_z_cp_ell_from_fmt.f90 | 65 +++ ext/impl/psb_z_cp_ell_to_coo.f90 | 69 +++ ext/impl/psb_z_cp_ell_to_fmt.f90 | 67 +++ ext/impl/psb_z_cp_hdia_from_coo.f90 | 222 ++++++++ ext/impl/psb_z_cp_hdia_to_coo.f90 | 84 +++ ext/impl/psb_z_cp_hll_from_coo.f90 | 74 +++ ext/impl/psb_z_cp_hll_from_fmt.f90 | 70 +++ ext/impl/psb_z_cp_hll_to_coo.f90 | 104 ++++ ext/impl/psb_z_cp_hll_to_fmt.f90 | 68 +++ ext/impl/psb_z_dia_aclsum.f90 | 87 +++ ext/impl/psb_z_dia_allocate_mnnz.f90 | 88 +++ ext/impl/psb_z_dia_arwsum.f90 | 87 +++ ext/impl/psb_z_dia_colsum.f90 | 87 +++ ext/impl/psb_z_dia_csgetptn.f90 | 188 ++++++ ext/impl/psb_z_dia_csgetrow.f90 | 199 +++++++ ext/impl/psb_z_dia_csmm.f90 | 134 +++++ ext/impl/psb_z_dia_csmv.f90 | 135 +++++ ext/impl/psb_z_dia_get_diag.f90 | 75 +++ ext/impl/psb_z_dia_maxval.f90 | 54 ++ ext/impl/psb_z_dia_mold.f90 | 61 ++ ext/impl/psb_z_dia_print.f90 | 148 +++++ ext/impl/psb_z_dia_reallocate_nz.f90 | 56 ++ ext/impl/psb_z_dia_reinit.f90 | 78 +++ ext/impl/psb_z_dia_rowsum.f90 | 87 +++ ext/impl/psb_z_dia_scal.f90 | 108 ++++ ext/impl/psb_z_dia_scals.f90 | 63 +++ ext/impl/psb_z_dns_mat_impl.f90 | 724 ++++++++++++++++++++++++ ext/impl/psb_z_ell_aclsum.f90 | 82 +++ ext/impl/psb_z_ell_allocate_mnnz.f90 | 91 +++ ext/impl/psb_z_ell_arwsum.f90 | 78 +++ ext/impl/psb_z_ell_colsum.f90 | 80 +++ ext/impl/psb_z_ell_csgetblk.f90 | 83 +++ ext/impl/psb_z_ell_csgetptn.f90 | 189 +++++++ ext/impl/psb_z_ell_csgetrow.f90 | 205 +++++++ ext/impl/psb_z_ell_csmm.f90 | 377 ++++++++++++ ext/impl/psb_z_ell_csmv.f90 | 433 ++++++++++++++ ext/impl/psb_z_ell_csnm1.f90 | 73 +++ ext/impl/psb_z_ell_csnmi.f90 | 58 ++ ext/impl/psb_z_ell_csput.f90 | 208 +++++++ ext/impl/psb_z_ell_cssm.f90 | 375 ++++++++++++ ext/impl/psb_z_ell_cssv.f90 | 372 ++++++++++++ ext/impl/psb_z_ell_get_diag.f90 | 77 +++ ext/impl/psb_z_ell_maxval.f90 | 60 ++ ext/impl/psb_z_ell_mold.f90 | 63 +++ ext/impl/psb_z_ell_print.f90 | 99 ++++ ext/impl/psb_z_ell_reallocate_nz.f90 | 66 +++ ext/impl/psb_z_ell_reinit.f90 | 77 +++ ext/impl/psb_z_ell_rowsum.f90 | 77 +++ ext/impl/psb_z_ell_scal.f90 | 99 ++++ ext/impl/psb_z_ell_scals.f90 | 63 +++ ext/impl/psb_z_ell_trim.f90 | 60 ++ ext/impl/psb_z_hdia_allocate_mnnz.f90 | 75 +++ ext/impl/psb_z_hdia_csmv.f90 | 162 ++++++ ext/impl/psb_z_hdia_mold.f90 | 63 +++ ext/impl/psb_z_hdia_print.f90 | 121 ++++ ext/impl/psb_z_hll_aclsum.f90 | 109 ++++ ext/impl/psb_z_hll_allocate_mnnz.f90 | 93 +++ ext/impl/psb_z_hll_arwsum.f90 | 108 ++++ ext/impl/psb_z_hll_colsum.f90 | 109 ++++ ext/impl/psb_z_hll_csgetblk.f90 | 83 +++ ext/impl/psb_z_hll_csgetptn.f90 | 209 +++++++ ext/impl/psb_z_hll_csgetrow.f90 | 221 ++++++++ ext/impl/psb_z_hll_csmm.f90 | 235 ++++++++ ext/impl/psb_z_hll_csmv.f90 | 563 ++++++++++++++++++ ext/impl/psb_z_hll_csnm1.f90 | 111 ++++ ext/impl/psb_z_hll_csnmi.f90 | 104 ++++ ext/impl/psb_z_hll_csput.f90 | 233 ++++++++ ext/impl/psb_z_hll_cssm.f90 | 506 +++++++++++++++++ ext/impl/psb_z_hll_cssv.f90 | 498 ++++++++++++++++ ext/impl/psb_z_hll_get_diag.f90 | 110 ++++ ext/impl/psb_z_hll_maxval.f90 | 45 ++ ext/impl/psb_z_hll_mold.f90 | 65 +++ ext/impl/psb_z_hll_print.f90 | 134 +++++ ext/impl/psb_z_hll_reallocate_nz.f90 | 64 +++ ext/impl/psb_z_hll_reinit.f90 | 77 +++ ext/impl/psb_z_hll_rowsum.f90 | 110 ++++ ext/impl/psb_z_hll_scal.f90 | 135 +++++ ext/impl/psb_z_hll_scals.f90 | 63 +++ ext/impl/psb_z_mv_dia_from_coo.f90 | 62 ++ ext/impl/psb_z_mv_dia_to_coo.f90 | 55 ++ ext/impl/psb_z_mv_ell_from_coo.f90 | 56 ++ ext/impl/psb_z_mv_ell_from_fmt.f90 | 67 +++ ext/impl/psb_z_mv_ell_to_coo.f90 | 89 +++ ext/impl/psb_z_mv_ell_to_fmt.f90 | 67 +++ ext/impl/psb_z_mv_hdia_from_coo.f90 | 60 ++ ext/impl/psb_z_mv_hdia_to_coo.f90 | 55 ++ ext/impl/psb_z_mv_hll_from_coo.f90 | 58 ++ ext/impl/psb_z_mv_hll_from_fmt.f90 | 70 +++ ext/impl/psb_z_mv_hll_to_coo.f90 | 56 ++ ext/impl/psb_z_mv_hll_to_fmt.f90 | 69 +++ ext/impl/psi_c_convert_dia_from_coo.f90 | 73 +++ ext/impl/psi_c_convert_ell_from_coo.f90 | 87 +++ ext/impl/psi_c_convert_hll_from_coo.f90 | 122 ++++ ext/impl/psi_c_xtr_coo_from_dia.f90 | 80 +++ ext/impl/psi_c_xtr_dia_from_coo.f90 | 69 +++ ext/impl/psi_c_xtr_ell_from_coo.f90 | 63 +++ ext/impl/psi_d_convert_dia_from_coo.f90 | 73 +++ ext/impl/psi_d_convert_ell_from_coo.f90 | 87 +++ ext/impl/psi_d_convert_hll_from_coo.f90 | 122 ++++ ext/impl/psi_d_xtr_coo_from_dia.f90 | 80 +++ ext/impl/psi_d_xtr_dia_from_coo.f90 | 69 +++ ext/impl/psi_d_xtr_ell_from_coo.f90 | 63 +++ ext/impl/psi_s_convert_dia_from_coo.f90 | 73 +++ ext/impl/psi_s_convert_ell_from_coo.f90 | 87 +++ ext/impl/psi_s_convert_hll_from_coo.f90 | 122 ++++ ext/impl/psi_s_xtr_coo_from_dia.f90 | 80 +++ ext/impl/psi_s_xtr_dia_from_coo.f90 | 69 +++ ext/impl/psi_s_xtr_ell_from_coo.f90 | 63 +++ ext/impl/psi_z_convert_dia_from_coo.f90 | 73 +++ ext/impl/psi_z_convert_ell_from_coo.f90 | 87 +++ ext/impl/psi_z_convert_hll_from_coo.f90 | 122 ++++ ext/impl/psi_z_xtr_coo_from_dia.f90 | 80 +++ ext/impl/psi_z_xtr_dia_from_coo.f90 | 69 +++ ext/impl/psi_z_xtr_ell_from_coo.f90 | 63 +++ ext/psb_c_dia_mat_mod.f90 | 513 +++++++++++++++++ ext/psb_c_dns_mat_mod.f90 | 467 +++++++++++++++ ext/psb_c_ell_mat_mod.f90 | 544 ++++++++++++++++++ ext/psb_c_hdia_mat_mod.f90 | 534 +++++++++++++++++ ext/psb_c_hll_mat_mod.f90 | 564 ++++++++++++++++++ ext/psb_d_dia_mat_mod.f90 | 513 +++++++++++++++++ ext/psb_d_dns_mat_mod.f90 | 467 +++++++++++++++ ext/psb_d_ell_mat_mod.f90 | 544 ++++++++++++++++++ ext/psb_d_hdia_mat_mod.f90 | 534 +++++++++++++++++ ext/psb_d_hll_mat_mod.f90 | 564 ++++++++++++++++++ ext/psb_ext_mod.F90 | 65 +++ ext/psb_s_dia_mat_mod.f90 | 513 +++++++++++++++++ ext/psb_s_dns_mat_mod.f90 | 467 +++++++++++++++ ext/psb_s_ell_mat_mod.f90 | 544 ++++++++++++++++++ ext/psb_s_hdia_mat_mod.f90 | 534 +++++++++++++++++ ext/psb_s_hll_mat_mod.f90 | 564 ++++++++++++++++++ ext/psb_z_dia_mat_mod.f90 | 513 +++++++++++++++++ ext/psb_z_dns_mat_mod.f90 | 467 +++++++++++++++ ext/psb_z_ell_mat_mod.f90 | 544 ++++++++++++++++++ ext/psb_z_hdia_mat_mod.f90 | 534 +++++++++++++++++ ext/psb_z_hll_mat_mod.f90 | 564 ++++++++++++++++++ ext/psi_c_ext_util_mod.f90 | 80 +++ ext/psi_d_ext_util_mod.f90 | 80 +++ ext/psi_ext_util_mod.f90 | 41 ++ ext/psi_i_ext_util_mod.f90 | 175 ++++++ ext/psi_s_ext_util_mod.f90 | 80 +++ ext/psi_z_ext_util_mod.f90 | 80 +++ 426 files changed, 61981 insertions(+), 2 deletions(-) create mode 100755 ext/Makefile create mode 100755 ext/impl/Makefile create mode 100644 ext/impl/psb_c_cp_dia_from_coo.f90 create mode 100644 ext/impl/psb_c_cp_dia_to_coo.f90 create mode 100644 ext/impl/psb_c_cp_ell_from_coo.f90 create mode 100644 ext/impl/psb_c_cp_ell_from_fmt.f90 create mode 100644 ext/impl/psb_c_cp_ell_to_coo.f90 create mode 100644 ext/impl/psb_c_cp_ell_to_fmt.f90 create mode 100644 ext/impl/psb_c_cp_hdia_from_coo.f90 create mode 100644 ext/impl/psb_c_cp_hdia_to_coo.f90 create mode 100644 ext/impl/psb_c_cp_hll_from_coo.f90 create mode 100644 ext/impl/psb_c_cp_hll_from_fmt.f90 create mode 100644 ext/impl/psb_c_cp_hll_to_coo.f90 create mode 100644 ext/impl/psb_c_cp_hll_to_fmt.f90 create mode 100644 ext/impl/psb_c_dia_aclsum.f90 create mode 100644 ext/impl/psb_c_dia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_c_dia_arwsum.f90 create mode 100644 ext/impl/psb_c_dia_colsum.f90 create mode 100644 ext/impl/psb_c_dia_csgetptn.f90 create mode 100644 ext/impl/psb_c_dia_csgetrow.f90 create mode 100644 ext/impl/psb_c_dia_csmm.f90 create mode 100644 ext/impl/psb_c_dia_csmv.f90 create mode 100644 ext/impl/psb_c_dia_get_diag.f90 create mode 100644 ext/impl/psb_c_dia_maxval.f90 create mode 100644 ext/impl/psb_c_dia_mold.f90 create mode 100644 ext/impl/psb_c_dia_print.f90 create mode 100644 ext/impl/psb_c_dia_reallocate_nz.f90 create mode 100644 ext/impl/psb_c_dia_reinit.f90 create mode 100644 ext/impl/psb_c_dia_rowsum.f90 create mode 100644 ext/impl/psb_c_dia_scal.f90 create mode 100644 ext/impl/psb_c_dia_scals.f90 create mode 100644 ext/impl/psb_c_dns_mat_impl.f90 create mode 100644 ext/impl/psb_c_ell_aclsum.f90 create mode 100644 ext/impl/psb_c_ell_allocate_mnnz.f90 create mode 100644 ext/impl/psb_c_ell_arwsum.f90 create mode 100644 ext/impl/psb_c_ell_colsum.f90 create mode 100644 ext/impl/psb_c_ell_csgetblk.f90 create mode 100644 ext/impl/psb_c_ell_csgetptn.f90 create mode 100644 ext/impl/psb_c_ell_csgetrow.f90 create mode 100644 ext/impl/psb_c_ell_csmm.f90 create mode 100644 ext/impl/psb_c_ell_csmv.f90 create mode 100644 ext/impl/psb_c_ell_csnm1.f90 create mode 100644 ext/impl/psb_c_ell_csnmi.f90 create mode 100644 ext/impl/psb_c_ell_csput.f90 create mode 100644 ext/impl/psb_c_ell_cssm.f90 create mode 100644 ext/impl/psb_c_ell_cssv.f90 create mode 100644 ext/impl/psb_c_ell_get_diag.f90 create mode 100644 ext/impl/psb_c_ell_maxval.f90 create mode 100644 ext/impl/psb_c_ell_mold.f90 create mode 100644 ext/impl/psb_c_ell_print.f90 create mode 100644 ext/impl/psb_c_ell_reallocate_nz.f90 create mode 100644 ext/impl/psb_c_ell_reinit.f90 create mode 100644 ext/impl/psb_c_ell_rowsum.f90 create mode 100644 ext/impl/psb_c_ell_scal.f90 create mode 100644 ext/impl/psb_c_ell_scals.f90 create mode 100644 ext/impl/psb_c_ell_trim.f90 create mode 100644 ext/impl/psb_c_hdia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_c_hdia_csmv.f90 create mode 100644 ext/impl/psb_c_hdia_mold.f90 create mode 100644 ext/impl/psb_c_hdia_print.f90 create mode 100644 ext/impl/psb_c_hll_aclsum.f90 create mode 100644 ext/impl/psb_c_hll_allocate_mnnz.f90 create mode 100644 ext/impl/psb_c_hll_arwsum.f90 create mode 100644 ext/impl/psb_c_hll_colsum.f90 create mode 100644 ext/impl/psb_c_hll_csgetblk.f90 create mode 100644 ext/impl/psb_c_hll_csgetptn.f90 create mode 100644 ext/impl/psb_c_hll_csgetrow.f90 create mode 100644 ext/impl/psb_c_hll_csmm.f90 create mode 100644 ext/impl/psb_c_hll_csmv.f90 create mode 100644 ext/impl/psb_c_hll_csnm1.f90 create mode 100644 ext/impl/psb_c_hll_csnmi.f90 create mode 100644 ext/impl/psb_c_hll_csput.f90 create mode 100644 ext/impl/psb_c_hll_cssm.f90 create mode 100644 ext/impl/psb_c_hll_cssv.f90 create mode 100644 ext/impl/psb_c_hll_get_diag.f90 create mode 100644 ext/impl/psb_c_hll_maxval.f90 create mode 100644 ext/impl/psb_c_hll_mold.f90 create mode 100644 ext/impl/psb_c_hll_print.f90 create mode 100644 ext/impl/psb_c_hll_reallocate_nz.f90 create mode 100644 ext/impl/psb_c_hll_reinit.f90 create mode 100644 ext/impl/psb_c_hll_rowsum.f90 create mode 100644 ext/impl/psb_c_hll_scal.f90 create mode 100644 ext/impl/psb_c_hll_scals.f90 create mode 100644 ext/impl/psb_c_mv_dia_from_coo.f90 create mode 100644 ext/impl/psb_c_mv_dia_to_coo.f90 create mode 100644 ext/impl/psb_c_mv_ell_from_coo.f90 create mode 100644 ext/impl/psb_c_mv_ell_from_fmt.f90 create mode 100644 ext/impl/psb_c_mv_ell_to_coo.f90 create mode 100644 ext/impl/psb_c_mv_ell_to_fmt.f90 create mode 100644 ext/impl/psb_c_mv_hdia_from_coo.f90 create mode 100644 ext/impl/psb_c_mv_hdia_to_coo.f90 create mode 100644 ext/impl/psb_c_mv_hll_from_coo.f90 create mode 100644 ext/impl/psb_c_mv_hll_from_fmt.f90 create mode 100644 ext/impl/psb_c_mv_hll_to_coo.f90 create mode 100644 ext/impl/psb_c_mv_hll_to_fmt.f90 create mode 100644 ext/impl/psb_d_cp_dia_from_coo.f90 create mode 100644 ext/impl/psb_d_cp_dia_to_coo.f90 create mode 100644 ext/impl/psb_d_cp_ell_from_coo.f90 create mode 100644 ext/impl/psb_d_cp_ell_from_fmt.f90 create mode 100644 ext/impl/psb_d_cp_ell_to_coo.f90 create mode 100644 ext/impl/psb_d_cp_ell_to_fmt.f90 create mode 100644 ext/impl/psb_d_cp_hdia_from_coo.f90 create mode 100644 ext/impl/psb_d_cp_hdia_to_coo.f90 create mode 100644 ext/impl/psb_d_cp_hll_from_coo.f90 create mode 100644 ext/impl/psb_d_cp_hll_from_fmt.f90 create mode 100644 ext/impl/psb_d_cp_hll_to_coo.f90 create mode 100644 ext/impl/psb_d_cp_hll_to_fmt.f90 create mode 100644 ext/impl/psb_d_dia_aclsum.f90 create mode 100644 ext/impl/psb_d_dia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_d_dia_arwsum.f90 create mode 100644 ext/impl/psb_d_dia_colsum.f90 create mode 100644 ext/impl/psb_d_dia_csgetptn.f90 create mode 100644 ext/impl/psb_d_dia_csgetrow.f90 create mode 100644 ext/impl/psb_d_dia_csmm.f90 create mode 100644 ext/impl/psb_d_dia_csmv.f90 create mode 100644 ext/impl/psb_d_dia_get_diag.f90 create mode 100644 ext/impl/psb_d_dia_maxval.f90 create mode 100644 ext/impl/psb_d_dia_mold.f90 create mode 100644 ext/impl/psb_d_dia_print.f90 create mode 100644 ext/impl/psb_d_dia_reallocate_nz.f90 create mode 100644 ext/impl/psb_d_dia_reinit.f90 create mode 100644 ext/impl/psb_d_dia_rowsum.f90 create mode 100644 ext/impl/psb_d_dia_scal.f90 create mode 100644 ext/impl/psb_d_dia_scals.f90 create mode 100644 ext/impl/psb_d_dns_mat_impl.f90 create mode 100644 ext/impl/psb_d_ell_aclsum.f90 create mode 100644 ext/impl/psb_d_ell_allocate_mnnz.f90 create mode 100644 ext/impl/psb_d_ell_arwsum.f90 create mode 100644 ext/impl/psb_d_ell_colsum.f90 create mode 100644 ext/impl/psb_d_ell_csgetblk.f90 create mode 100644 ext/impl/psb_d_ell_csgetptn.f90 create mode 100644 ext/impl/psb_d_ell_csgetrow.f90 create mode 100644 ext/impl/psb_d_ell_csmm.f90 create mode 100644 ext/impl/psb_d_ell_csmv.f90 create mode 100644 ext/impl/psb_d_ell_csnm1.f90 create mode 100644 ext/impl/psb_d_ell_csnmi.f90 create mode 100644 ext/impl/psb_d_ell_csput.f90 create mode 100644 ext/impl/psb_d_ell_cssm.f90 create mode 100644 ext/impl/psb_d_ell_cssv.f90 create mode 100644 ext/impl/psb_d_ell_get_diag.f90 create mode 100644 ext/impl/psb_d_ell_maxval.f90 create mode 100644 ext/impl/psb_d_ell_mold.f90 create mode 100644 ext/impl/psb_d_ell_print.f90 create mode 100644 ext/impl/psb_d_ell_reallocate_nz.f90 create mode 100644 ext/impl/psb_d_ell_reinit.f90 create mode 100644 ext/impl/psb_d_ell_rowsum.f90 create mode 100644 ext/impl/psb_d_ell_scal.f90 create mode 100644 ext/impl/psb_d_ell_scals.f90 create mode 100644 ext/impl/psb_d_ell_trim.f90 create mode 100644 ext/impl/psb_d_hdia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_d_hdia_csmv.f90 create mode 100644 ext/impl/psb_d_hdia_mold.f90 create mode 100644 ext/impl/psb_d_hdia_print.f90 create mode 100644 ext/impl/psb_d_hll_aclsum.f90 create mode 100644 ext/impl/psb_d_hll_allocate_mnnz.f90 create mode 100644 ext/impl/psb_d_hll_arwsum.f90 create mode 100644 ext/impl/psb_d_hll_colsum.f90 create mode 100644 ext/impl/psb_d_hll_csgetblk.f90 create mode 100644 ext/impl/psb_d_hll_csgetptn.f90 create mode 100644 ext/impl/psb_d_hll_csgetrow.f90 create mode 100644 ext/impl/psb_d_hll_csmm.f90 create mode 100644 ext/impl/psb_d_hll_csmv.f90 create mode 100644 ext/impl/psb_d_hll_csnm1.f90 create mode 100644 ext/impl/psb_d_hll_csnmi.f90 create mode 100644 ext/impl/psb_d_hll_csput.f90 create mode 100644 ext/impl/psb_d_hll_cssm.f90 create mode 100644 ext/impl/psb_d_hll_cssv.f90 create mode 100644 ext/impl/psb_d_hll_get_diag.f90 create mode 100644 ext/impl/psb_d_hll_maxval.f90 create mode 100644 ext/impl/psb_d_hll_mold.f90 create mode 100644 ext/impl/psb_d_hll_print.f90 create mode 100644 ext/impl/psb_d_hll_reallocate_nz.f90 create mode 100644 ext/impl/psb_d_hll_reinit.f90 create mode 100644 ext/impl/psb_d_hll_rowsum.f90 create mode 100644 ext/impl/psb_d_hll_scal.f90 create mode 100644 ext/impl/psb_d_hll_scals.f90 create mode 100644 ext/impl/psb_d_mv_dia_from_coo.f90 create mode 100644 ext/impl/psb_d_mv_dia_to_coo.f90 create mode 100644 ext/impl/psb_d_mv_ell_from_coo.f90 create mode 100644 ext/impl/psb_d_mv_ell_from_fmt.f90 create mode 100644 ext/impl/psb_d_mv_ell_to_coo.f90 create mode 100644 ext/impl/psb_d_mv_ell_to_fmt.f90 create mode 100644 ext/impl/psb_d_mv_hdia_from_coo.f90 create mode 100644 ext/impl/psb_d_mv_hdia_to_coo.f90 create mode 100644 ext/impl/psb_d_mv_hll_from_coo.f90 create mode 100644 ext/impl/psb_d_mv_hll_from_fmt.f90 create mode 100644 ext/impl/psb_d_mv_hll_to_coo.f90 create mode 100644 ext/impl/psb_d_mv_hll_to_fmt.f90 create mode 100644 ext/impl/psb_s_cp_dia_from_coo.f90 create mode 100644 ext/impl/psb_s_cp_dia_to_coo.f90 create mode 100644 ext/impl/psb_s_cp_ell_from_coo.f90 create mode 100644 ext/impl/psb_s_cp_ell_from_fmt.f90 create mode 100644 ext/impl/psb_s_cp_ell_to_coo.f90 create mode 100644 ext/impl/psb_s_cp_ell_to_fmt.f90 create mode 100644 ext/impl/psb_s_cp_hdia_from_coo.f90 create mode 100644 ext/impl/psb_s_cp_hdia_to_coo.f90 create mode 100644 ext/impl/psb_s_cp_hll_from_coo.f90 create mode 100644 ext/impl/psb_s_cp_hll_from_fmt.f90 create mode 100644 ext/impl/psb_s_cp_hll_to_coo.f90 create mode 100644 ext/impl/psb_s_cp_hll_to_fmt.f90 create mode 100644 ext/impl/psb_s_dia_aclsum.f90 create mode 100644 ext/impl/psb_s_dia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_s_dia_arwsum.f90 create mode 100644 ext/impl/psb_s_dia_colsum.f90 create mode 100644 ext/impl/psb_s_dia_csgetptn.f90 create mode 100644 ext/impl/psb_s_dia_csgetrow.f90 create mode 100644 ext/impl/psb_s_dia_csmm.f90 create mode 100644 ext/impl/psb_s_dia_csmv.f90 create mode 100644 ext/impl/psb_s_dia_get_diag.f90 create mode 100644 ext/impl/psb_s_dia_maxval.f90 create mode 100644 ext/impl/psb_s_dia_mold.f90 create mode 100644 ext/impl/psb_s_dia_print.f90 create mode 100644 ext/impl/psb_s_dia_reallocate_nz.f90 create mode 100644 ext/impl/psb_s_dia_reinit.f90 create mode 100644 ext/impl/psb_s_dia_rowsum.f90 create mode 100644 ext/impl/psb_s_dia_scal.f90 create mode 100644 ext/impl/psb_s_dia_scals.f90 create mode 100644 ext/impl/psb_s_dns_mat_impl.f90 create mode 100644 ext/impl/psb_s_ell_aclsum.f90 create mode 100644 ext/impl/psb_s_ell_allocate_mnnz.f90 create mode 100644 ext/impl/psb_s_ell_arwsum.f90 create mode 100644 ext/impl/psb_s_ell_colsum.f90 create mode 100644 ext/impl/psb_s_ell_csgetblk.f90 create mode 100644 ext/impl/psb_s_ell_csgetptn.f90 create mode 100644 ext/impl/psb_s_ell_csgetrow.f90 create mode 100644 ext/impl/psb_s_ell_csmm.f90 create mode 100644 ext/impl/psb_s_ell_csmv.f90 create mode 100644 ext/impl/psb_s_ell_csnm1.f90 create mode 100644 ext/impl/psb_s_ell_csnmi.f90 create mode 100644 ext/impl/psb_s_ell_csput.f90 create mode 100644 ext/impl/psb_s_ell_cssm.f90 create mode 100644 ext/impl/psb_s_ell_cssv.f90 create mode 100644 ext/impl/psb_s_ell_get_diag.f90 create mode 100644 ext/impl/psb_s_ell_maxval.f90 create mode 100644 ext/impl/psb_s_ell_mold.f90 create mode 100644 ext/impl/psb_s_ell_print.f90 create mode 100644 ext/impl/psb_s_ell_reallocate_nz.f90 create mode 100644 ext/impl/psb_s_ell_reinit.f90 create mode 100644 ext/impl/psb_s_ell_rowsum.f90 create mode 100644 ext/impl/psb_s_ell_scal.f90 create mode 100644 ext/impl/psb_s_ell_scals.f90 create mode 100644 ext/impl/psb_s_ell_trim.f90 create mode 100644 ext/impl/psb_s_hdia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_s_hdia_csmv.f90 create mode 100644 ext/impl/psb_s_hdia_mold.f90 create mode 100644 ext/impl/psb_s_hdia_print.f90 create mode 100644 ext/impl/psb_s_hll_aclsum.f90 create mode 100644 ext/impl/psb_s_hll_allocate_mnnz.f90 create mode 100644 ext/impl/psb_s_hll_arwsum.f90 create mode 100644 ext/impl/psb_s_hll_colsum.f90 create mode 100644 ext/impl/psb_s_hll_csgetblk.f90 create mode 100644 ext/impl/psb_s_hll_csgetptn.f90 create mode 100644 ext/impl/psb_s_hll_csgetrow.f90 create mode 100644 ext/impl/psb_s_hll_csmm.f90 create mode 100644 ext/impl/psb_s_hll_csmv.f90 create mode 100644 ext/impl/psb_s_hll_csnm1.f90 create mode 100644 ext/impl/psb_s_hll_csnmi.f90 create mode 100644 ext/impl/psb_s_hll_csput.f90 create mode 100644 ext/impl/psb_s_hll_cssm.f90 create mode 100644 ext/impl/psb_s_hll_cssv.f90 create mode 100644 ext/impl/psb_s_hll_get_diag.f90 create mode 100644 ext/impl/psb_s_hll_maxval.f90 create mode 100644 ext/impl/psb_s_hll_mold.f90 create mode 100644 ext/impl/psb_s_hll_print.f90 create mode 100644 ext/impl/psb_s_hll_reallocate_nz.f90 create mode 100644 ext/impl/psb_s_hll_reinit.f90 create mode 100644 ext/impl/psb_s_hll_rowsum.f90 create mode 100644 ext/impl/psb_s_hll_scal.f90 create mode 100644 ext/impl/psb_s_hll_scals.f90 create mode 100644 ext/impl/psb_s_mv_dia_from_coo.f90 create mode 100644 ext/impl/psb_s_mv_dia_to_coo.f90 create mode 100644 ext/impl/psb_s_mv_ell_from_coo.f90 create mode 100644 ext/impl/psb_s_mv_ell_from_fmt.f90 create mode 100644 ext/impl/psb_s_mv_ell_to_coo.f90 create mode 100644 ext/impl/psb_s_mv_ell_to_fmt.f90 create mode 100644 ext/impl/psb_s_mv_hdia_from_coo.f90 create mode 100644 ext/impl/psb_s_mv_hdia_to_coo.f90 create mode 100644 ext/impl/psb_s_mv_hll_from_coo.f90 create mode 100644 ext/impl/psb_s_mv_hll_from_fmt.f90 create mode 100644 ext/impl/psb_s_mv_hll_to_coo.f90 create mode 100644 ext/impl/psb_s_mv_hll_to_fmt.f90 create mode 100644 ext/impl/psb_z_cp_dia_from_coo.f90 create mode 100644 ext/impl/psb_z_cp_dia_to_coo.f90 create mode 100644 ext/impl/psb_z_cp_ell_from_coo.f90 create mode 100644 ext/impl/psb_z_cp_ell_from_fmt.f90 create mode 100644 ext/impl/psb_z_cp_ell_to_coo.f90 create mode 100644 ext/impl/psb_z_cp_ell_to_fmt.f90 create mode 100644 ext/impl/psb_z_cp_hdia_from_coo.f90 create mode 100644 ext/impl/psb_z_cp_hdia_to_coo.f90 create mode 100644 ext/impl/psb_z_cp_hll_from_coo.f90 create mode 100644 ext/impl/psb_z_cp_hll_from_fmt.f90 create mode 100644 ext/impl/psb_z_cp_hll_to_coo.f90 create mode 100644 ext/impl/psb_z_cp_hll_to_fmt.f90 create mode 100644 ext/impl/psb_z_dia_aclsum.f90 create mode 100644 ext/impl/psb_z_dia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_z_dia_arwsum.f90 create mode 100644 ext/impl/psb_z_dia_colsum.f90 create mode 100644 ext/impl/psb_z_dia_csgetptn.f90 create mode 100644 ext/impl/psb_z_dia_csgetrow.f90 create mode 100644 ext/impl/psb_z_dia_csmm.f90 create mode 100644 ext/impl/psb_z_dia_csmv.f90 create mode 100644 ext/impl/psb_z_dia_get_diag.f90 create mode 100644 ext/impl/psb_z_dia_maxval.f90 create mode 100644 ext/impl/psb_z_dia_mold.f90 create mode 100644 ext/impl/psb_z_dia_print.f90 create mode 100644 ext/impl/psb_z_dia_reallocate_nz.f90 create mode 100644 ext/impl/psb_z_dia_reinit.f90 create mode 100644 ext/impl/psb_z_dia_rowsum.f90 create mode 100644 ext/impl/psb_z_dia_scal.f90 create mode 100644 ext/impl/psb_z_dia_scals.f90 create mode 100644 ext/impl/psb_z_dns_mat_impl.f90 create mode 100644 ext/impl/psb_z_ell_aclsum.f90 create mode 100644 ext/impl/psb_z_ell_allocate_mnnz.f90 create mode 100644 ext/impl/psb_z_ell_arwsum.f90 create mode 100644 ext/impl/psb_z_ell_colsum.f90 create mode 100644 ext/impl/psb_z_ell_csgetblk.f90 create mode 100644 ext/impl/psb_z_ell_csgetptn.f90 create mode 100644 ext/impl/psb_z_ell_csgetrow.f90 create mode 100644 ext/impl/psb_z_ell_csmm.f90 create mode 100644 ext/impl/psb_z_ell_csmv.f90 create mode 100644 ext/impl/psb_z_ell_csnm1.f90 create mode 100644 ext/impl/psb_z_ell_csnmi.f90 create mode 100644 ext/impl/psb_z_ell_csput.f90 create mode 100644 ext/impl/psb_z_ell_cssm.f90 create mode 100644 ext/impl/psb_z_ell_cssv.f90 create mode 100644 ext/impl/psb_z_ell_get_diag.f90 create mode 100644 ext/impl/psb_z_ell_maxval.f90 create mode 100644 ext/impl/psb_z_ell_mold.f90 create mode 100644 ext/impl/psb_z_ell_print.f90 create mode 100644 ext/impl/psb_z_ell_reallocate_nz.f90 create mode 100644 ext/impl/psb_z_ell_reinit.f90 create mode 100644 ext/impl/psb_z_ell_rowsum.f90 create mode 100644 ext/impl/psb_z_ell_scal.f90 create mode 100644 ext/impl/psb_z_ell_scals.f90 create mode 100644 ext/impl/psb_z_ell_trim.f90 create mode 100644 ext/impl/psb_z_hdia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_z_hdia_csmv.f90 create mode 100644 ext/impl/psb_z_hdia_mold.f90 create mode 100644 ext/impl/psb_z_hdia_print.f90 create mode 100644 ext/impl/psb_z_hll_aclsum.f90 create mode 100644 ext/impl/psb_z_hll_allocate_mnnz.f90 create mode 100644 ext/impl/psb_z_hll_arwsum.f90 create mode 100644 ext/impl/psb_z_hll_colsum.f90 create mode 100644 ext/impl/psb_z_hll_csgetblk.f90 create mode 100644 ext/impl/psb_z_hll_csgetptn.f90 create mode 100644 ext/impl/psb_z_hll_csgetrow.f90 create mode 100644 ext/impl/psb_z_hll_csmm.f90 create mode 100644 ext/impl/psb_z_hll_csmv.f90 create mode 100644 ext/impl/psb_z_hll_csnm1.f90 create mode 100644 ext/impl/psb_z_hll_csnmi.f90 create mode 100644 ext/impl/psb_z_hll_csput.f90 create mode 100644 ext/impl/psb_z_hll_cssm.f90 create mode 100644 ext/impl/psb_z_hll_cssv.f90 create mode 100644 ext/impl/psb_z_hll_get_diag.f90 create mode 100644 ext/impl/psb_z_hll_maxval.f90 create mode 100644 ext/impl/psb_z_hll_mold.f90 create mode 100644 ext/impl/psb_z_hll_print.f90 create mode 100644 ext/impl/psb_z_hll_reallocate_nz.f90 create mode 100644 ext/impl/psb_z_hll_reinit.f90 create mode 100644 ext/impl/psb_z_hll_rowsum.f90 create mode 100644 ext/impl/psb_z_hll_scal.f90 create mode 100644 ext/impl/psb_z_hll_scals.f90 create mode 100644 ext/impl/psb_z_mv_dia_from_coo.f90 create mode 100644 ext/impl/psb_z_mv_dia_to_coo.f90 create mode 100644 ext/impl/psb_z_mv_ell_from_coo.f90 create mode 100644 ext/impl/psb_z_mv_ell_from_fmt.f90 create mode 100644 ext/impl/psb_z_mv_ell_to_coo.f90 create mode 100644 ext/impl/psb_z_mv_ell_to_fmt.f90 create mode 100644 ext/impl/psb_z_mv_hdia_from_coo.f90 create mode 100644 ext/impl/psb_z_mv_hdia_to_coo.f90 create mode 100644 ext/impl/psb_z_mv_hll_from_coo.f90 create mode 100644 ext/impl/psb_z_mv_hll_from_fmt.f90 create mode 100644 ext/impl/psb_z_mv_hll_to_coo.f90 create mode 100644 ext/impl/psb_z_mv_hll_to_fmt.f90 create mode 100644 ext/impl/psi_c_convert_dia_from_coo.f90 create mode 100644 ext/impl/psi_c_convert_ell_from_coo.f90 create mode 100644 ext/impl/psi_c_convert_hll_from_coo.f90 create mode 100644 ext/impl/psi_c_xtr_coo_from_dia.f90 create mode 100644 ext/impl/psi_c_xtr_dia_from_coo.f90 create mode 100644 ext/impl/psi_c_xtr_ell_from_coo.f90 create mode 100644 ext/impl/psi_d_convert_dia_from_coo.f90 create mode 100644 ext/impl/psi_d_convert_ell_from_coo.f90 create mode 100644 ext/impl/psi_d_convert_hll_from_coo.f90 create mode 100644 ext/impl/psi_d_xtr_coo_from_dia.f90 create mode 100644 ext/impl/psi_d_xtr_dia_from_coo.f90 create mode 100644 ext/impl/psi_d_xtr_ell_from_coo.f90 create mode 100644 ext/impl/psi_s_convert_dia_from_coo.f90 create mode 100644 ext/impl/psi_s_convert_ell_from_coo.f90 create mode 100644 ext/impl/psi_s_convert_hll_from_coo.f90 create mode 100644 ext/impl/psi_s_xtr_coo_from_dia.f90 create mode 100644 ext/impl/psi_s_xtr_dia_from_coo.f90 create mode 100644 ext/impl/psi_s_xtr_ell_from_coo.f90 create mode 100644 ext/impl/psi_z_convert_dia_from_coo.f90 create mode 100644 ext/impl/psi_z_convert_ell_from_coo.f90 create mode 100644 ext/impl/psi_z_convert_hll_from_coo.f90 create mode 100644 ext/impl/psi_z_xtr_coo_from_dia.f90 create mode 100644 ext/impl/psi_z_xtr_dia_from_coo.f90 create mode 100644 ext/impl/psi_z_xtr_ell_from_coo.f90 create mode 100644 ext/psb_c_dia_mat_mod.f90 create mode 100644 ext/psb_c_dns_mat_mod.f90 create mode 100644 ext/psb_c_ell_mat_mod.f90 create mode 100644 ext/psb_c_hdia_mat_mod.f90 create mode 100644 ext/psb_c_hll_mat_mod.f90 create mode 100644 ext/psb_d_dia_mat_mod.f90 create mode 100644 ext/psb_d_dns_mat_mod.f90 create mode 100644 ext/psb_d_ell_mat_mod.f90 create mode 100644 ext/psb_d_hdia_mat_mod.f90 create mode 100644 ext/psb_d_hll_mat_mod.f90 create mode 100644 ext/psb_ext_mod.F90 create mode 100644 ext/psb_s_dia_mat_mod.f90 create mode 100644 ext/psb_s_dns_mat_mod.f90 create mode 100644 ext/psb_s_ell_mat_mod.f90 create mode 100644 ext/psb_s_hdia_mat_mod.f90 create mode 100644 ext/psb_s_hll_mat_mod.f90 create mode 100644 ext/psb_z_dia_mat_mod.f90 create mode 100644 ext/psb_z_dns_mat_mod.f90 create mode 100644 ext/psb_z_ell_mat_mod.f90 create mode 100644 ext/psb_z_hdia_mat_mod.f90 create mode 100644 ext/psb_z_hll_mat_mod.f90 create mode 100644 ext/psi_c_ext_util_mod.f90 create mode 100644 ext/psi_d_ext_util_mod.f90 create mode 100644 ext/psi_ext_util_mod.f90 create mode 100644 ext/psi_i_ext_util_mod.f90 create mode 100644 ext/psi_s_ext_util_mod.f90 create mode 100644 ext/psi_z_ext_util_mod.f90 diff --git a/Makefile b/Makefile index a0f5ec3e..4a79afce 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ include Make.inc -all: dirs based precd kryld utild cbindd libd +all: dirs based precd kryld utild cbindd extd libd @echo "=====================================" @echo "PSBLAS libraries Compilation Successful." @@ -12,15 +12,17 @@ dirs: precd: based utild: based kryld: precd +extd: based cbindd: based precd kryld utild -libd: based precd kryld utild cbindd +libd: based precd kryld utild cbindd extd $(MAKE) -C base lib $(MAKE) -C prec lib $(MAKE) -C krylov lib $(MAKE) -C util lib $(MAKE) -C cbind lib + $(MAKE) -C ext lib based: $(MAKE) -C base objs @@ -32,6 +34,8 @@ utild: $(MAKE) -C util objs cbindd: $(MAKE) -C cbind objs +extd: + $(MAKE) -C ext objs install: all @@ -56,6 +60,7 @@ clean: $(MAKE) -C krylov clean $(MAKE) -C util clean $(MAKE) -C cbind clean + $(MAKE) -C ext clean check: all make check -C test/serial @@ -71,6 +76,7 @@ veryclean: cleanlib cd krylov && $(MAKE) veryclean cd util && $(MAKE) veryclean cd cbind && $(MAKE) veryclean + cd ext && $(MAKE) veryclean cd test/fileread && $(MAKE) clean cd test/pargen && $(MAKE) clean cd test/util && $(MAKE) clean diff --git a/ext/Makefile b/ext/Makefile new file mode 100755 index 00000000..36b82433 --- /dev/null +++ b/ext/Makefile @@ -0,0 +1,84 @@ +include ../Make.inc +# +# Libraries used +# +LIBDIR=../lib +INCDIR=../include +MODDIR=../modules +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG). +CINCLUDES= +LIBNAME=libpsb_ext.a + + +FOBJS= psb_d_ell_mat_mod.o psb_d_hll_mat_mod.o \ + psb_s_hll_mat_mod.o psb_s_ell_mat_mod.o \ + psb_c_hll_mat_mod.o psb_c_ell_mat_mod.o \ + psb_z_hll_mat_mod.o psb_z_ell_mat_mod.o \ + psb_d_dia_mat_mod.o psb_d_hdia_mat_mod.o \ + psb_s_dia_mat_mod.o psb_s_hdia_mat_mod.o \ + psb_c_dia_mat_mod.o psb_c_hdia_mat_mod.o \ + psb_z_dia_mat_mod.o psb_z_hdia_mat_mod.o \ + psb_s_dns_mat_mod.o psb_d_dns_mat_mod.o \ + psb_c_dns_mat_mod.o psb_z_dns_mat_mod.o \ + psi_ext_util_mod.o psi_i_ext_util_mod.o \ + psi_s_ext_util_mod.o psi_c_ext_util_mod.o \ + psi_d_ext_util_mod.o psi_z_ext_util_mod.o \ + psb_ext_mod.o + +COBJS= + +OBJS=$(COBJS) $(FOBJS) + +lib: objs ilib + ar cur $(LIBNAME) $(OBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + +objs: $(OBJS) iobjs + /bin/cp -p *$(.mod) $(MODDIR) + + + +psb_ext_mod.o: psb_s_dia_mat_mod.o psb_d_dia_mat_mod.o \ + psb_c_dia_mat_mod.o psb_z_dia_mat_mod.o \ + psb_d_ell_mat_mod.o psb_d_hll_mat_mod.o \ + psb_s_hll_mat_mod.o psb_s_ell_mat_mod.o \ + psb_c_hll_mat_mod.o psb_c_ell_mat_mod.o \ + psb_z_hll_mat_mod.o psb_z_ell_mat_mod.o \ + psb_s_hdia_mat_mod.o psb_d_hdia_mat_mod.o \ + psb_c_hdia_mat_mod.o psb_z_hdia_mat_mod.o \ + psb_s_dns_mat_mod.o psb_d_dns_mat_mod.o \ + psb_c_dns_mat_mod.o psb_z_dns_mat_mod.o + +# psb_d_rsb_mat_mod.o psb_d_hdia_mat_mod.o +psi_ext_util_mod.o: psi_i_ext_util_mod.o \ + psi_s_ext_util_mod.o psi_c_ext_util_mod.o \ + psi_d_ext_util_mod.o psi_z_ext_util_mod.o + +psb_s_dia_mat_mod.o psb_c_dia_mat_mod.o psb_d_dia_mat_mod.o psb_z_dia_mat_mod.o: psi_ext_util_mod.o +psb_s_hdia_mat_mod.o psb_c_hdia_mat_mod.o psb_d_hdia_mat_mod.o psb_z_hdia_mat_mod.o: psi_ext_util_mod.o +psb_s_hll_mat_mod.o psb_c_hll_mat_mod.o psb_d_hll_mat_mod.o psb_z_hll_mat_mod.o: psi_ext_util_mod.o + +ilib: objs + $(MAKE) -C impl lib LIBNAME=$(LIBNAME) + +iobjs: $(OBJS) + $(MAKE) -C impl objs + +clean: cclean iclean + /bin/rm -f $(FOBJS) *$(.mod) *.a + +cclean: + /bin/rm -f $(COBJS) +iclean: + $(MAKE) -C impl clean + +veryclean: clean + /bin/rm -f $(HERE)/$(LIBNAME) $(LIBMOD) *$(.mod) + + + + diff --git a/ext/impl/Makefile b/ext/impl/Makefile new file mode 100755 index 00000000..57593a54 --- /dev/null +++ b/ext/impl/Makefile @@ -0,0 +1,412 @@ +include ../../Make.inc +LIBDIR=../../lib +INCDIR=../../include +MODDIR=../../modules +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. +LIBNAME=libpsb_ext.a + +OBJS= \ +psb_s_cp_dia_from_coo.o \ +psb_s_cp_dia_to_coo.o \ +psb_s_cp_ell_from_coo.o \ +psb_s_cp_ell_from_fmt.o \ +psb_s_cp_ell_to_coo.o \ +psb_s_cp_ell_to_fmt.o \ +psb_s_cp_hdia_from_coo.o \ +psb_s_cp_hdia_to_coo.o \ +psb_s_cp_hll_from_coo.o \ +psb_s_cp_hll_from_fmt.o \ +psb_s_cp_hll_to_coo.o \ +psb_s_cp_hll_to_fmt.o \ +psb_s_dia_aclsum.o \ +psb_s_dia_allocate_mnnz.o \ +psb_s_dia_arwsum.o \ +psb_s_dia_colsum.o \ +psb_s_dia_csgetptn.o \ +psb_s_dia_csgetrow.o \ +psb_s_dia_csmm.o \ +psb_s_dia_csmv.o \ +psb_s_dia_get_diag.o \ +psb_s_dia_maxval.o \ +psb_s_dia_mold.o \ +psb_s_dia_print.o \ +psb_s_dia_reallocate_nz.o \ +psb_s_dia_reinit.o \ +psb_s_dia_rowsum.o \ +psb_s_dia_scal.o \ +psb_s_dia_scals.o \ +psb_s_ell_aclsum.o \ +psb_s_ell_allocate_mnnz.o \ +psb_s_ell_arwsum.o \ +psb_s_ell_colsum.o \ +psb_s_ell_csgetblk.o \ +psb_s_ell_csgetptn.o \ +psb_s_ell_csgetrow.o \ +psb_s_ell_csmm.o \ +psb_s_ell_csmv.o \ +psb_s_ell_csnm1.o \ +psb_s_ell_csnmi.o \ +psb_s_ell_csput.o \ +psb_s_ell_cssm.o \ +psb_s_ell_cssv.o \ +psb_s_ell_get_diag.o \ +psb_s_ell_maxval.o \ +psb_s_ell_mold.o \ +psb_s_ell_print.o \ +psb_s_ell_reallocate_nz.o \ +psb_s_ell_reinit.o \ +psb_s_ell_rowsum.o \ +psb_s_ell_scal.o \ +psb_s_ell_scals.o \ +psb_s_ell_trim.o \ +psb_s_hdia_allocate_mnnz.o \ +psb_s_hdia_csmv.o \ +psb_s_hdia_mold.o \ +psb_s_hdia_print.o \ +psb_s_hll_aclsum.o \ +psb_s_hll_allocate_mnnz.o \ +psb_s_hll_arwsum.o \ +psb_s_hll_colsum.o \ +psb_s_hll_csgetblk.o \ +psb_s_hll_csgetptn.o \ +psb_s_hll_csgetrow.o \ +psb_s_hll_csmm.o \ +psb_s_hll_csmv.o \ +psb_s_hll_csnm1.o \ +psb_s_hll_csnmi.o \ +psb_s_hll_csput.o \ +psb_s_hll_cssm.o \ +psb_s_hll_cssv.o \ +psb_s_hll_get_diag.o \ +psb_s_hll_maxval.o \ +psb_s_hll_mold.o \ +psb_s_hll_print.o \ +psb_s_hll_reallocate_nz.o \ +psb_s_hll_reinit.o \ +psb_s_hll_rowsum.o \ +psb_s_hll_scal.o \ +psb_s_hll_scals.o \ +psb_s_mv_dia_from_coo.o \ +psb_s_mv_ell_from_coo.o \ +psb_s_mv_ell_from_fmt.o \ +psb_s_mv_ell_to_coo.o \ +psb_s_mv_ell_to_fmt.o \ +psb_s_mv_hdia_from_coo.o \ +psb_s_mv_hdia_to_coo.o \ +psb_s_mv_hll_from_coo.o \ +psb_s_mv_hll_from_fmt.o \ +psb_s_mv_hll_to_coo.o \ +psb_s_mv_hll_to_fmt.o \ +psb_c_cp_dia_from_coo.o \ +psb_c_cp_dia_to_coo.o \ +psb_c_cp_ell_from_coo.o \ +psb_c_cp_ell_from_fmt.o \ +psb_c_cp_ell_to_coo.o \ +psb_c_cp_ell_to_fmt.o \ +psb_c_cp_hdia_from_coo.o \ +psb_c_cp_hdia_to_coo.o \ +psb_c_cp_hll_from_coo.o \ +psb_c_cp_hll_from_fmt.o \ +psb_c_cp_hll_to_coo.o \ +psb_c_cp_hll_to_fmt.o \ +psb_c_dia_aclsum.o \ +psb_c_dia_allocate_mnnz.o \ +psb_c_dia_arwsum.o \ +psb_c_dia_colsum.o \ +psb_c_dia_csgetptn.o \ +psb_c_dia_csgetrow.o \ +psb_c_dia_csmm.o \ +psb_c_dia_csmv.o \ +psb_c_dia_get_diag.o \ +psb_c_dia_maxval.o \ +psb_c_dia_mold.o \ +psb_c_dia_print.o \ +psb_c_dia_reallocate_nz.o \ +psb_c_dia_reinit.o \ +psb_c_dia_rowsum.o \ +psb_c_dia_scal.o \ +psb_c_dia_scals.o \ +psb_c_ell_aclsum.o \ +psb_c_ell_allocate_mnnz.o \ +psb_c_ell_arwsum.o \ +psb_c_ell_colsum.o \ +psb_c_ell_csgetblk.o \ +psb_c_ell_csgetptn.o \ +psb_c_ell_csgetrow.o \ +psb_c_ell_csmm.o \ +psb_c_ell_csmv.o \ +psb_c_ell_csnm1.o \ +psb_c_ell_csnmi.o \ +psb_c_ell_csput.o \ +psb_c_ell_cssm.o \ +psb_c_ell_cssv.o \ +psb_c_ell_get_diag.o \ +psb_c_ell_maxval.o \ +psb_c_ell_mold.o \ +psb_c_ell_print.o \ +psb_c_ell_reallocate_nz.o \ +psb_c_ell_reinit.o \ +psb_c_ell_rowsum.o \ +psb_c_ell_scal.o \ +psb_c_ell_scals.o \ +psb_c_ell_trim.o \ +psb_c_hdia_allocate_mnnz.o \ +psb_c_hdia_csmv.o \ +psb_c_hdia_mold.o \ +psb_c_hdia_print.o \ +psb_c_hll_aclsum.o \ +psb_c_hll_allocate_mnnz.o \ +psb_c_hll_arwsum.o \ +psb_c_hll_colsum.o \ +psb_c_hll_csgetblk.o \ +psb_c_hll_csgetptn.o \ +psb_c_hll_csgetrow.o \ +psb_c_hll_csmm.o \ +psb_c_hll_csmv.o \ +psb_c_hll_csnm1.o \ +psb_c_hll_csnmi.o \ +psb_c_hll_csput.o \ +psb_c_hll_cssm.o \ +psb_c_hll_cssv.o \ +psb_c_hll_get_diag.o \ +psb_c_hll_maxval.o \ +psb_c_hll_mold.o \ +psb_c_hll_print.o \ +psb_c_hll_reallocate_nz.o \ +psb_c_hll_reinit.o \ +psb_c_hll_rowsum.o \ +psb_c_hll_scal.o \ +psb_c_hll_scals.o \ +psb_c_mv_dia_from_coo.o \ +psb_c_mv_ell_from_coo.o \ +psb_c_mv_ell_from_fmt.o \ +psb_c_mv_ell_to_coo.o \ +psb_c_mv_ell_to_fmt.o \ +psb_c_mv_hdia_from_coo.o \ +psb_c_mv_hdia_to_coo.o \ +psb_c_mv_hll_from_coo.o \ +psb_c_mv_hll_from_fmt.o \ +psb_c_mv_hll_to_coo.o \ +psb_c_mv_hll_to_fmt.o \ +psb_d_cp_dia_from_coo.o \ +psb_d_cp_dia_to_coo.o \ +psb_d_cp_ell_from_coo.o \ +psb_d_cp_ell_from_fmt.o \ +psb_d_cp_ell_to_coo.o \ +psb_d_cp_ell_to_fmt.o \ +psb_d_cp_hdia_from_coo.o \ +psb_d_cp_hdia_to_coo.o \ +psb_d_cp_hll_from_coo.o \ +psb_d_cp_hll_from_fmt.o \ +psb_d_cp_hll_to_coo.o \ +psb_d_cp_hll_to_fmt.o \ +psb_d_dia_aclsum.o \ +psb_d_dia_allocate_mnnz.o \ +psb_d_dia_arwsum.o \ +psb_d_dia_colsum.o \ +psb_d_dia_csgetptn.o \ +psb_d_dia_csgetrow.o \ +psb_d_dia_csmm.o \ +psb_d_dia_csmv.o \ +psb_d_dia_get_diag.o \ +psb_d_dia_maxval.o \ +psb_d_dia_mold.o \ +psb_d_dia_print.o \ +psb_d_dia_reallocate_nz.o \ +psb_d_dia_reinit.o \ +psb_d_dia_rowsum.o \ +psb_d_dia_scal.o \ +psb_d_dia_scals.o \ +psb_d_ell_aclsum.o \ +psb_d_ell_allocate_mnnz.o \ +psb_d_ell_arwsum.o \ +psb_d_ell_colsum.o \ +psb_d_ell_csgetblk.o \ +psb_d_ell_csgetptn.o \ +psb_d_ell_csgetrow.o \ +psb_d_ell_csmm.o \ +psb_d_ell_csmv.o \ +psb_d_ell_csnm1.o \ +psb_d_ell_csnmi.o \ +psb_d_ell_csput.o \ +psb_d_ell_cssm.o \ +psb_d_ell_cssv.o \ +psb_d_ell_get_diag.o \ +psb_d_ell_maxval.o \ +psb_d_ell_mold.o \ +psb_d_ell_print.o \ +psb_d_ell_reallocate_nz.o \ +psb_d_ell_reinit.o \ +psb_d_ell_rowsum.o \ +psb_d_ell_scal.o \ +psb_d_ell_scals.o \ +psb_d_ell_trim.o \ +psb_d_hdia_allocate_mnnz.o \ +psb_d_hdia_csmv.o \ +psb_d_hdia_mold.o \ +psb_d_hdia_print.o \ +psb_d_hll_aclsum.o \ +psb_d_hll_allocate_mnnz.o \ +psb_d_hll_arwsum.o \ +psb_d_hll_colsum.o \ +psb_d_hll_csgetblk.o \ +psb_d_hll_csgetptn.o \ +psb_d_hll_csgetrow.o \ +psb_d_hll_csmm.o \ +psb_d_hll_csmv.o \ +psb_d_hll_csnm1.o \ +psb_d_hll_csnmi.o \ +psb_d_hll_csput.o \ +psb_d_hll_cssm.o \ +psb_d_hll_cssv.o \ +psb_d_hll_get_diag.o \ +psb_d_hll_maxval.o \ +psb_d_hll_mold.o \ +psb_d_hll_print.o \ +psb_d_hll_reallocate_nz.o \ +psb_d_hll_reinit.o \ +psb_d_hll_rowsum.o \ +psb_d_hll_scal.o \ +psb_d_hll_scals.o \ +psb_d_mv_dia_from_coo.o \ +psb_d_mv_ell_from_coo.o \ +psb_d_mv_ell_from_fmt.o \ +psb_d_mv_ell_to_coo.o \ +psb_d_mv_ell_to_fmt.o \ +psb_d_mv_hdia_from_coo.o \ +psb_d_mv_hdia_to_coo.o \ +psb_d_mv_hll_from_coo.o \ +psb_d_mv_hll_from_fmt.o \ +psb_d_mv_hll_to_coo.o \ +psb_d_mv_hll_to_fmt.o \ +psb_z_cp_dia_from_coo.o \ +psb_z_cp_dia_to_coo.o \ +psb_z_cp_ell_from_coo.o \ +psb_z_cp_ell_from_fmt.o \ +psb_z_cp_ell_to_coo.o \ +psb_z_cp_ell_to_fmt.o \ +psb_z_cp_hdia_from_coo.o \ +psb_z_cp_hdia_to_coo.o \ +psb_z_cp_hll_from_coo.o \ +psb_z_cp_hll_from_fmt.o \ +psb_z_cp_hll_to_coo.o \ +psb_z_cp_hll_to_fmt.o \ +psb_z_dia_aclsum.o \ +psb_z_dia_allocate_mnnz.o \ +psb_z_dia_arwsum.o \ +psb_z_dia_colsum.o \ +psb_z_dia_csgetptn.o \ +psb_z_dia_csgetrow.o \ +psb_z_dia_csmm.o \ +psb_z_dia_csmv.o \ +psb_z_dia_get_diag.o \ +psb_z_dia_maxval.o \ +psb_z_dia_mold.o \ +psb_z_dia_print.o \ +psb_z_dia_reallocate_nz.o \ +psb_z_dia_reinit.o \ +psb_z_dia_rowsum.o \ +psb_z_dia_scal.o \ +psb_z_dia_scals.o \ +psb_z_ell_aclsum.o \ +psb_z_ell_allocate_mnnz.o \ +psb_z_ell_arwsum.o \ +psb_z_ell_colsum.o \ +psb_z_ell_csgetblk.o \ +psb_z_ell_csgetptn.o \ +psb_z_ell_csgetrow.o \ +psb_z_ell_csmm.o \ +psb_z_ell_csmv.o \ +psb_z_ell_csnm1.o \ +psb_z_ell_csnmi.o \ +psb_z_ell_csput.o \ +psb_z_ell_cssm.o \ +psb_z_ell_cssv.o \ +psb_z_ell_get_diag.o \ +psb_z_ell_maxval.o \ +psb_z_ell_mold.o \ +psb_z_ell_print.o \ +psb_z_ell_reallocate_nz.o \ +psb_z_ell_reinit.o \ +psb_z_ell_rowsum.o \ +psb_z_ell_scal.o \ +psb_z_ell_scals.o \ +psb_z_ell_trim.o \ +psb_z_hdia_allocate_mnnz.o \ +psb_z_hdia_csmv.o \ +psb_z_hdia_mold.o \ +psb_z_hdia_print.o \ +psb_z_hll_aclsum.o \ +psb_z_hll_allocate_mnnz.o \ +psb_z_hll_arwsum.o \ +psb_z_hll_colsum.o \ +psb_z_hll_csgetblk.o \ +psb_z_hll_csgetptn.o \ +psb_z_hll_csgetrow.o \ +psb_z_hll_csmm.o \ +psb_z_hll_csmv.o \ +psb_z_hll_csnm1.o \ +psb_z_hll_csnmi.o \ +psb_z_hll_csput.o \ +psb_z_hll_cssm.o \ +psb_z_hll_cssv.o \ +psb_z_hll_get_diag.o \ +psb_z_hll_maxval.o \ +psb_z_hll_mold.o \ +psb_z_hll_print.o \ +psb_z_hll_reallocate_nz.o \ +psb_z_hll_reinit.o \ +psb_z_hll_rowsum.o \ +psb_z_hll_scal.o \ +psb_z_hll_scals.o \ +psb_z_mv_dia_from_coo.o \ +psb_z_mv_ell_from_coo.o \ +psb_z_mv_ell_from_fmt.o \ +psb_z_mv_ell_to_coo.o \ +psb_z_mv_ell_to_fmt.o \ +psb_z_mv_hdia_from_coo.o \ +psb_z_mv_hdia_to_coo.o \ +psb_z_mv_hll_from_coo.o \ +psb_z_mv_hll_from_fmt.o \ +psb_z_mv_hll_to_coo.o \ +psb_z_mv_hll_to_fmt.o \ +psi_s_xtr_ell_from_coo.o \ +psi_c_xtr_ell_from_coo.o \ +psi_d_xtr_ell_from_coo.o \ +psi_z_xtr_ell_from_coo.o \ +psi_s_convert_ell_from_coo.o \ +psi_c_convert_ell_from_coo.o \ +psi_d_convert_ell_from_coo.o \ +psi_z_convert_ell_from_coo.o \ +psi_s_convert_hll_from_coo.o \ +psi_c_convert_hll_from_coo.o \ +psi_d_convert_hll_from_coo.o \ +psi_z_convert_hll_from_coo.o \ +psi_s_xtr_dia_from_coo.o \ +psi_c_xtr_dia_from_coo.o \ +psi_d_xtr_dia_from_coo.o \ +psi_z_xtr_dia_from_coo.o \ +psi_s_xtr_coo_from_dia.o \ +psi_d_xtr_coo_from_dia.o \ +psi_c_xtr_coo_from_dia.o \ +psi_z_xtr_coo_from_dia.o \ +psi_s_convert_dia_from_coo.o \ +psi_c_convert_dia_from_coo.o \ +psi_d_convert_dia_from_coo.o \ +psi_z_convert_dia_from_coo.o \ +psb_s_dns_mat_impl.o \ +psb_d_dns_mat_impl.o \ +psb_c_dns_mat_impl.o \ +psb_z_dns_mat_impl.o + +objs: $(OBJS) + +lib: objs + ar cur ../$(LIBNAME) $(OBJS) + +clean: + /bin/rm -f $(OBJS) diff --git a/ext/impl/psb_c_cp_dia_from_coo.f90 b/ext/impl/psb_c_cp_dia_from_coo.f90 new file mode 100644 index 00000000..2d2b1caa --- /dev/null +++ b/ext/impl/psb_c_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_cp_dia_from_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_dia_from_coo diff --git a/ext/impl/psb_c_cp_dia_to_coo.f90 b/ext/impl/psb_c_cp_dia_to_coo.f90 new file mode 100644 index 00000000..9975bec0 --- /dev/null +++ b/ext/impl/psb_c_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_cp_dia_to_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + call psi_c_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_c_cp_dia_to_coo diff --git a/ext/impl/psb_c_cp_ell_from_coo.f90 b/ext/impl/psb_c_cp_ell_from_coo.f90 new file mode 100644 index 00000000..28d7d242 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_c_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_c_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_c_cp_ell_from_coo diff --git a/ext/impl/psb_c_cp_ell_from_fmt.f90 b/ext/impl/psb_c_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..309063b9 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_from_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_c_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_c_cp_ell_from_fmt diff --git a/ext/impl/psb_c_cp_ell_to_coo.f90 b/ext/impl/psb_c_cp_ell_to_coo.f90 new file mode 100644 index 00000000..ec6bcff5 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_to_coo + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_c_cp_ell_to_coo diff --git a/ext/impl/psb_c_cp_ell_to_fmt.f90 b/ext/impl/psb_c_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..0c6a6903 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_to_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_c_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cp_ell_to_fmt diff --git a/ext/impl/psb_c_cp_hdia_from_coo.f90 b/ext/impl/psb_c_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..a9e1ca21 --- /dev/null +++ b/ext/impl/psb_c_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_cp_hdia_from_coo + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_c_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_c_cp_hdia_from_coo diff --git a/ext/impl/psb_c_cp_hdia_to_coo.f90 b/ext/impl/psb_c_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..32801653 --- /dev/null +++ b/ext/impl/psb_c_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_c_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_c_cp_hdia_to_coo diff --git a/ext/impl/psb_c_cp_hll_from_coo.f90 b/ext/impl/psb_c_cp_hll_from_coo.f90 new file mode 100644 index 00000000..506196c2 --- /dev/null +++ b/ext/impl/psb_c_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_from_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_hll_from_coo diff --git a/ext/impl/psb_c_cp_hll_from_fmt.f90 b/ext/impl/psb_c_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..0849561f --- /dev/null +++ b/ext/impl/psb_c_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_from_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_c_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_c_cp_hll_from_fmt diff --git a/ext/impl/psb_c_cp_hll_to_coo.f90 b/ext/impl/psb_c_cp_hll_to_coo.f90 new file mode 100644 index 00000000..0ff46352 --- /dev/null +++ b/ext/impl/psb_c_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_to_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + complex(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_c_cp_hll_to_coo diff --git a/ext/impl/psb_c_cp_hll_to_fmt.f90 b/ext/impl/psb_c_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..df8fa3b7 --- /dev/null +++ b/ext/impl/psb_c_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_to_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_c_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cp_hll_to_fmt diff --git a/ext/impl/psb_c_dia_aclsum.f90 b/ext/impl/psb_c_dia_aclsum.f90 new file mode 100644 index 00000000..4bd8d440 --- /dev/null +++ b/ext/impl/psb_c_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_aclsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_aclsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_aclsum diff --git a/ext/impl/psb_c_dia_allocate_mnnz.f90 b/ext/impl/psb_c_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..37fb34e1 --- /dev/null +++ b/ext/impl/psb_c_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_allocate_mnnz diff --git a/ext/impl/psb_c_dia_arwsum.f90 b/ext/impl/psb_c_dia_arwsum.f90 new file mode 100644 index 00000000..fe40deb8 --- /dev/null +++ b/ext/impl/psb_c_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_arwsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_arwsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_arwsum diff --git a/ext/impl/psb_c_dia_colsum.f90 b/ext/impl/psb_c_dia_colsum.f90 new file mode 100644 index 00000000..ed43fa12 --- /dev/null +++ b/ext/impl/psb_c_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_colsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_colsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = cone + else + d = czero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_colsum diff --git a/ext/impl/psb_c_dia_csgetptn.f90 b/ext/impl/psb_c_dia_csgetptn.f90 new file mode 100644 index 00000000..ad479d35 --- /dev/null +++ b/ext/impl/psb_c_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csgetptn + implicit none + + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_c_dia_csgetptn diff --git a/ext/impl/psb_c_dia_csgetrow.f90 b/ext/impl/psb_c_dia_csgetrow.f90 new file mode 100644 index 00000000..2989b20f --- /dev/null +++ b/ext/impl/psb_c_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csgetrow + implicit none + + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_c_dia_csgetrow diff --git a/ext/impl/psb_c_dia_csmm.f90 b/ext/impl/psb_c_dia_csmm.f90 new file mode 100644 index 00000000..b65c4651 --- /dev/null +++ b/ext/impl/psb_c_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csmm + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_c_dia_csmm_inner + +end subroutine psb_c_dia_csmm diff --git a/ext/impl/psb_c_dia_csmv.f90 b/ext/impl/psb_c_dia_csmv.f90 new file mode 100644 index 00000000..cf1ef677 --- /dev/null +++ b/ext/impl/psb_c_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csmv + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_c_dia_csmv_inner + +end subroutine psb_c_dia_csmv diff --git a/ext/impl/psb_c_dia_get_diag.f90 b/ext/impl/psb_c_dia_get_diag.f90 new file mode 100644 index 00000000..d868b62d --- /dev/null +++ b/ext/impl/psb_c_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_get_diag + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = cone + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = czero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_get_diag diff --git a/ext/impl/psb_c_dia_maxval.f90 b/ext/impl/psb_c_dia_maxval.f90 new file mode 100644 index 00000000..03a2be82 --- /dev/null +++ b/ext/impl/psb_c_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_c_dia_maxval(a) result(res) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_maxval + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_c_dia_maxval diff --git a/ext/impl/psb_c_dia_mold.f90 b/ext/impl/psb_c_dia_mold.f90 new file mode 100644 index 00000000..1d694828 --- /dev/null +++ b/ext/impl/psb_c_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_mold(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_mold + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_mold diff --git a/ext/impl/psb_c_dia_print.f90 b/ext/impl/psb_c_dia_print.f90 new file mode 100644 index 00000000..f3233366 --- /dev/null +++ b/ext/impl/psb_c_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_dia_print' + logical, parameter :: debug=.false. + + class(psb_c_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_c_dia_print diff --git a/ext/impl/psb_c_dia_reallocate_nz.f90 b/ext/impl/psb_c_dia_reallocate_nz.f90 new file mode 100644 index 00000000..c46cd465 --- /dev/null +++ b/ext/impl/psb_c_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_reallocate_nz diff --git a/ext/impl/psb_c_dia_reinit.f90 b/ext/impl/psb_c_dia_reinit.f90 new file mode 100644 index 00000000..04a345eb --- /dev/null +++ b/ext/impl/psb_c_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_reinit(a,clear) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_reinit + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = czero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_reinit diff --git a/ext/impl/psb_c_dia_rowsum.f90 b/ext/impl/psb_c_dia_rowsum.f90 new file mode 100644 index 00000000..1f36dab4 --- /dev/null +++ b/ext/impl/psb_c_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_rowsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_rowsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_rowsum diff --git a/ext/impl/psb_c_dia_scal.f90 b/ext/impl/psb_c_dia_scal.f90 new file mode 100644 index 00000000..8f35b7c1 --- /dev/null +++ b/ext/impl/psb_c_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_scal + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_scal diff --git a/ext/impl/psb_c_dia_scals.f90 b/ext/impl/psb_c_dia_scals.f90 new file mode 100644 index 00000000..a9ca5db1 --- /dev/null +++ b/ext/impl/psb_c_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_scals(d,a,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_scals + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_scals diff --git a/ext/impl/psb_c_dns_mat_impl.f90 b/ext/impl/psb_c_dns_mat_impl.f90 new file mode 100644 index 00000000..8e99af8b --- /dev/null +++ b/ext/impl/psb_c_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_c_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_c_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csmv + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='c_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call cgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_csmv + + +!> Function csmm: +!! \memberof psb_c_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_c_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csmm + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='c_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call cgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_c_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_c_dns_csnmi(a) result(res) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csnmi + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_spk_) :: acc + + res = szero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_c_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_c_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_c_dns_get_diag(a,d,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_get_diag + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = czero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_c_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_c_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_c_dns_sparse_mat +!! \brief Allocate a class(psb_c_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_c_dns_mold(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_mold + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_c_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_c_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_c_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = czero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_c_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_c_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csgetrow + implicit none + + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_c_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_c_dns_trim(a) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_trim + implicit none + class(psb_c_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Copy and convert from psb_c_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_c_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_cp_dns_from_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = czero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = czero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Copy and convert to psb_c_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_c_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_cp_dns_to_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= czero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_c_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Convert to psb_c_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_c_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_mv_dns_to_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_c_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Convert from psb_c_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_c_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_mv_dns_from_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_c_mv_dns_from_coo + diff --git a/ext/impl/psb_c_ell_aclsum.f90 b/ext/impl/psb_c_ell_aclsum.f90 new file mode 100644 index 00000000..3d5a292a --- /dev/null +++ b/ext/impl/psb_c_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_aclsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_aclsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_aclsum diff --git a/ext/impl/psb_c_ell_allocate_mnnz.f90 b/ext/impl/psb_c_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..b137eb04 --- /dev/null +++ b/ext/impl/psb_c_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_allocate_mnnz diff --git a/ext/impl/psb_c_ell_arwsum.f90 b/ext/impl/psb_c_ell_arwsum.f90 new file mode 100644 index 00000000..c047c742 --- /dev/null +++ b/ext/impl/psb_c_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_arwsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_arwsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = sone + else + d(i) = szero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_arwsum diff --git a/ext/impl/psb_c_ell_colsum.f90 b/ext/impl/psb_c_ell_colsum.f90 new file mode 100644 index 00000000..6d06b589 --- /dev/null +++ b/ext/impl/psb_c_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_colsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_colsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = cone + else + d = czero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_colsum diff --git a/ext/impl/psb_c_ell_csgetblk.f90 b/ext/impl/psb_c_ell_csgetblk.f90 new file mode 100644 index 00000000..deb07c25 --- /dev/null +++ b/ext/impl/psb_c_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csgetblk + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_csgetblk diff --git a/ext/impl/psb_c_ell_csgetptn.f90 b/ext/impl/psb_c_ell_csgetptn.f90 new file mode 100644 index 00000000..821daa89 --- /dev/null +++ b/ext/impl/psb_c_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csgetptn + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_ell_csgetrow + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_ell_csmm + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_ell_csmv + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_ell_csnm1 + + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_spk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = sone + else + vt(:) = szero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_c_ell_csnm1 diff --git a/ext/impl/psb_c_ell_csnmi.f90 b/ext/impl/psb_c_ell_csnmi.f90 new file mode 100644 index 00000000..6dc9cfa4 --- /dev/null +++ b/ext/impl/psb_c_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csnmi + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + sone + res = max(res,acc) + end do + +end function psb_c_ell_csnmi diff --git a/ext/impl/psb_c_ell_csput.f90 b/ext/impl/psb_c_ell_csput.f90 new file mode 100644 index 00000000..e0b0f47f --- /dev/null +++ b/ext/impl/psb_c_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csput_a + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_c_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_c_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='c_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_c_ell_srch_upd +end subroutine psb_c_ell_csput_a diff --git a/ext/impl/psb_c_ell_cssm.f90 b/ext/impl/psb_c_ell_cssm.f90 new file mode 100644 index 00000000..26e76030 --- /dev/null +++ b/ext/impl/psb_c_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_cssm + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_c_ell_cssv + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + complex(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_c_ell_get_diag + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = cone + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = czero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = czero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_get_diag diff --git a/ext/impl/psb_c_ell_maxval.f90 b/ext/impl/psb_c_ell_maxval.f90 new file mode 100644 index 00000000..4de58b11 --- /dev/null +++ b/ext/impl/psb_c_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_ell_maxval(a) result(res) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_maxval + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_c_ell_maxval diff --git a/ext/impl/psb_c_ell_mold.f90 b/ext/impl/psb_c_ell_mold.f90 new file mode 100644 index 00000000..c7c5d621 --- /dev/null +++ b/ext/impl/psb_c_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_mold(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_mold + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_mold diff --git a/ext/impl/psb_c_ell_print.f90 b/ext/impl/psb_c_ell_print.f90 new file mode 100644 index 00000000..1b8117a8 --- /dev/null +++ b/ext/impl/psb_c_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_c_ell_print diff --git a/ext/impl/psb_c_ell_reallocate_nz.f90 b/ext/impl/psb_c_ell_reallocate_nz.f90 new file mode 100644 index 00000000..b0d77568 --- /dev/null +++ b/ext/impl/psb_c_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_reallocate_nz diff --git a/ext/impl/psb_c_ell_reinit.f90 b/ext/impl/psb_c_ell_reinit.f90 new file mode 100644 index 00000000..2b15dfea --- /dev/null +++ b/ext/impl/psb_c_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_reinit(a,clear) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_reinit + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = czero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_reinit diff --git a/ext/impl/psb_c_ell_rowsum.f90 b/ext/impl/psb_c_ell_rowsum.f90 new file mode 100644 index 00000000..5ae7d42c --- /dev/null +++ b/ext/impl/psb_c_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_rowsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_rowsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = cone + else + d(i) = czero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_rowsum diff --git a/ext/impl/psb_c_ell_scal.f90 b/ext/impl/psb_c_ell_scal.f90 new file mode 100644 index 00000000..63150f32 --- /dev/null +++ b/ext/impl/psb_c_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_scal + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_scal diff --git a/ext/impl/psb_c_ell_scals.f90 b/ext/impl/psb_c_ell_scals.f90 new file mode 100644 index 00000000..3e4cd92a --- /dev/null +++ b/ext/impl/psb_c_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_scals(d,a,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_scals + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_scals diff --git a/ext/impl/psb_c_ell_trim.f90 b/ext/impl/psb_c_ell_trim.f90 new file mode 100644 index 00000000..22aafefd --- /dev/null +++ b/ext/impl/psb_c_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_trim(a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_trim + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_trim diff --git a/ext/impl/psb_c_hdia_allocate_mnnz.f90 b/ext/impl/psb_c_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..17a49ffe --- /dev/null +++ b/ext/impl/psb_c_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hdia_allocate_mnnz diff --git a/ext/impl/psb_c_hdia_csmv.f90 b/ext/impl/psb_c_hdia_csmv.f90 new file mode 100644 index 00000000..a04fde07 --- /dev/null +++ b/ext/impl/psb_c_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_csmv + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_c_inner_dia_csmv + +end subroutine psb_c_hdia_csmv diff --git a/ext/impl/psb_c_hdia_mold.f90 b/ext/impl/psb_c_hdia_mold.f90 new file mode 100644 index 00000000..d9f85ec9 --- /dev/null +++ b/ext/impl/psb_c_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hdia_mold(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_mold + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hdia_mold diff --git a/ext/impl/psb_c_hdia_print.f90 b/ext/impl/psb_c_hdia_print.f90 new file mode 100644 index 00000000..477a5433 --- /dev/null +++ b/ext/impl/psb_c_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_c_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + complex(psb_spk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_c_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_c_hdia_print diff --git a/ext/impl/psb_c_hll_aclsum.f90 b/ext/impl/psb_c_hll_aclsum.f90 new file mode 100644 index 00000000..f1bd8e89 --- /dev/null +++ b/ext/impl/psb_c_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_aclsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_aclsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine c_hll_aclsum + +end subroutine psb_c_hll_aclsum diff --git a/ext/impl/psb_c_hll_allocate_mnnz.f90 b/ext/impl/psb_c_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..97b996bd --- /dev/null +++ b/ext/impl/psb_c_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_allocate_mnnz diff --git a/ext/impl/psb_c_hll_arwsum.f90 b/ext/impl/psb_c_hll_arwsum.f90 new file mode 100644 index 00000000..9c48e1c0 --- /dev/null +++ b/ext/impl/psb_c_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_arwsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_arwsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine c_hll_arwsum + +end subroutine psb_c_hll_arwsum diff --git a/ext/impl/psb_c_hll_colsum.f90 b/ext/impl/psb_c_hll_colsum.f90 new file mode 100644 index 00000000..fbcb0934 --- /dev/null +++ b/ext/impl/psb_c_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_colsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_colsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = cone + else + d = czero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + complex(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine c_hll_colsum + +end subroutine psb_c_hll_colsum diff --git a/ext/impl/psb_c_hll_csgetblk.f90 b/ext/impl/psb_c_hll_csgetblk.f90 new file mode 100644 index 00000000..9bf0b869 --- /dev/null +++ b/ext/impl/psb_c_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csgetblk + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_csgetblk diff --git a/ext/impl/psb_c_hll_csgetptn.f90 b/ext/impl/psb_c_hll_csgetptn.f90 new file mode 100644 index 00000000..0f6481ed --- /dev/null +++ b/ext/impl/psb_c_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csgetptn + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_hll_csgetrow + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_hll_csmm + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + complex(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_c_hll_csmv + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_c_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_c_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == cone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == cone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*conjg(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == czero) then + if (beta == czero) then + do i=1,m + y(ir+i-1) = czero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == czero) then + do i=1,m + tmp = czero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = czero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_c_hll_csmv_inner + + subroutine psb_c_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == czero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_8 + + subroutine psb_c_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == czero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_24 + + subroutine psb_c_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == czero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_16 + + subroutine psb_c_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == czero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_32 + + subroutine psb_c_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == czero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_4 + +end subroutine psb_c_hll_csmv diff --git a/ext/impl/psb_c_hll_csnm1.f90 b/ext/impl/psb_c_hll_csnm1.f90 new file mode 100644 index 00000000..25daa75d --- /dev/null +++ b/ext/impl/psb_c_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csnm1 + + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_spk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = szero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = sone + else + vt = szero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_c_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_c_hll_csnm1_inner + +end function psb_c_hll_csnm1 diff --git a/ext/impl/psb_c_hll_csnmi.f90 b/ext/impl/psb_c_hll_csnmi.f90 new file mode 100644 index 00000000..c70be9ce --- /dev/null +++ b/ext/impl/psb_c_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csnmi + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='c_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = szero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_c_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = sone + else + tmp = szero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_c_hll_csnmi_inner + +end function psb_c_hll_csnmi diff --git a/ext/impl/psb_c_hll_csput.f90 b/ext/impl/psb_c_hll_csput.f90 new file mode 100644 index 00000000..e46ae30a --- /dev/null +++ b/ext/impl/psb_c_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csput_a + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_c_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='c_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_c_hll_srch_upd + +end subroutine psb_c_hll_csput_a diff --git a/ext/impl/psb_c_hll_cssm.f90 b/ext/impl/psb_c_hll_cssm.f90 new file mode 100644 index 00000000..90e3b978 --- /dev/null +++ b/ext/impl/psb_c_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_cssm + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + complex(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_c_hll_cssv + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + complex(psb_spk_) :: acc + complex(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_c_hll_get_diag + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = cone + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_c_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = czero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + complex(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = czero + end if + end do + + end subroutine psb_c_hll_get_diag_inner + +end subroutine psb_c_hll_get_diag diff --git a/ext/impl/psb_c_hll_maxval.f90 b/ext/impl/psb_c_hll_maxval.f90 new file mode 100644 index 00000000..ff82bb40 --- /dev/null +++ b/ext/impl/psb_c_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_hll_maxval(a) result(res) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_maxval + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,sone) + +end function psb_c_hll_maxval diff --git a/ext/impl/psb_c_hll_mold.f90 b/ext/impl/psb_c_hll_mold.f90 new file mode 100644 index 00000000..4a6204b0 --- /dev/null +++ b/ext/impl/psb_c_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_mold(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_mold + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_mold diff --git a/ext/impl/psb_c_hll_print.f90 b/ext/impl/psb_c_hll_print.f90 new file mode 100644 index 00000000..a5eec378 --- /dev/null +++ b/ext/impl/psb_c_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_c_hll_print diff --git a/ext/impl/psb_c_hll_reallocate_nz.f90 b/ext/impl/psb_c_hll_reallocate_nz.f90 new file mode 100644 index 00000000..44d9cfc9 --- /dev/null +++ b/ext/impl/psb_c_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_reallocate_nz diff --git a/ext/impl/psb_c_hll_reinit.f90 b/ext/impl/psb_c_hll_reinit.f90 new file mode 100644 index 00000000..82d5cb16 --- /dev/null +++ b/ext/impl/psb_c_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_reinit(a,clear) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_reinit + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = czero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_reinit diff --git a/ext/impl/psb_c_hll_rowsum.f90 b/ext/impl/psb_c_hll_rowsum.f90 new file mode 100644 index 00000000..e6eea227 --- /dev/null +++ b/ext/impl/psb_c_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_rowsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_rowsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = cone + else + d = czero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + complex(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine c_hll_rowsum + +end subroutine psb_c_hll_rowsum diff --git a/ext/impl/psb_c_hll_scal.f90 b/ext/impl/psb_c_hll_scal.f90 new file mode 100644 index 00000000..0fd59f15 --- /dev/null +++ b/ext/impl/psb_c_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_scal + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_c_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: d(*) + complex(psb_spk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_c_hll_scal_inner + + +end subroutine psb_c_hll_scal diff --git a/ext/impl/psb_c_hll_scals.f90 b/ext/impl/psb_c_hll_scals.f90 new file mode 100644 index 00000000..13a03a22 --- /dev/null +++ b/ext/impl/psb_c_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_scals(d,a,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_scals + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_scals diff --git a/ext/impl/psb_c_mv_dia_from_coo.f90 b/ext/impl/psb_c_mv_dia_from_coo.f90 new file mode 100644 index 00000000..99871348 --- /dev/null +++ b/ext/impl/psb_c_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_mv_dia_from_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_dia_from_coo diff --git a/ext/impl/psb_c_mv_dia_to_coo.f90 b/ext/impl/psb_c_mv_dia_to_coo.f90 new file mode 100644 index 00000000..1382cec3 --- /dev/null +++ b/ext/impl/psb_c_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_mv_dia_to_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_dia_to_coo diff --git a/ext/impl/psb_c_mv_ell_from_coo.f90 b/ext/impl/psb_c_mv_ell_from_coo.f90 new file mode 100644 index 00000000..64da3e8d --- /dev/null +++ b/ext/impl/psb_c_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_from_coo + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_c_mv_ell_from_coo diff --git a/ext/impl/psb_c_mv_ell_from_fmt.f90 b/ext/impl/psb_c_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..d0fa9bc4 --- /dev/null +++ b/ext/impl/psb_c_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_from_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_c_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_ell_from_fmt diff --git a/ext/impl/psb_c_mv_ell_to_coo.f90 b/ext/impl/psb_c_mv_ell_to_coo.f90 new file mode 100644 index 00000000..a49e2e3c --- /dev/null +++ b/ext/impl/psb_c_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_to_coo + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_ell_to_coo diff --git a/ext/impl/psb_c_mv_ell_to_fmt.f90 b/ext/impl/psb_c_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..3ea02d6b --- /dev/null +++ b/ext/impl/psb_c_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_to_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_c_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_ell_to_fmt diff --git a/ext/impl/psb_c_mv_hdia_from_coo.f90 b/ext/impl/psb_c_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..4247fdf8 --- /dev/null +++ b/ext/impl/psb_c_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_mv_hdia_from_coo + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_hdia_from_coo diff --git a/ext/impl/psb_c_mv_hdia_to_coo.f90 b/ext/impl/psb_c_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..3a91917a --- /dev/null +++ b/ext/impl/psb_c_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_mv_hdia_to_coo + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_hdia_to_coo diff --git a/ext/impl/psb_c_mv_hll_from_coo.f90 b/ext/impl/psb_c_mv_hll_from_coo.f90 new file mode 100644 index 00000000..b78bdd80 --- /dev/null +++ b/ext/impl/psb_c_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_from_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_hll_from_coo diff --git a/ext/impl/psb_c_mv_hll_from_fmt.f90 b/ext/impl/psb_c_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..add90355 --- /dev/null +++ b/ext/impl/psb_c_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_from_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_c_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_hll_from_fmt diff --git a/ext/impl/psb_c_mv_hll_to_coo.f90 b/ext/impl/psb_c_mv_hll_to_coo.f90 new file mode 100644 index 00000000..fbf5dfcd --- /dev/null +++ b/ext/impl/psb_c_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_to_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_hll_to_coo diff --git a/ext/impl/psb_c_mv_hll_to_fmt.f90 b/ext/impl/psb_c_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..37d77e85 --- /dev/null +++ b/ext/impl/psb_c_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_to_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_c_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_hll_to_fmt diff --git a/ext/impl/psb_d_cp_dia_from_coo.f90 b/ext/impl/psb_d_cp_dia_from_coo.f90 new file mode 100644 index 00000000..b640565f --- /dev/null +++ b/ext/impl/psb_d_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_cp_dia_from_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_dia_from_coo diff --git a/ext/impl/psb_d_cp_dia_to_coo.f90 b/ext/impl/psb_d_cp_dia_to_coo.f90 new file mode 100644 index 00000000..527c96d0 --- /dev/null +++ b/ext/impl/psb_d_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_cp_dia_to_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + call psi_d_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_d_cp_dia_to_coo diff --git a/ext/impl/psb_d_cp_ell_from_coo.f90 b/ext/impl/psb_d_cp_ell_from_coo.f90 new file mode 100644 index 00000000..cf23a0e0 --- /dev/null +++ b/ext/impl/psb_d_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_d_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_d_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_d_cp_ell_from_coo diff --git a/ext/impl/psb_d_cp_ell_from_fmt.f90 b/ext/impl/psb_d_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..ce8a8d7e --- /dev/null +++ b/ext/impl/psb_d_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_from_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_d_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_d_cp_ell_from_fmt diff --git a/ext/impl/psb_d_cp_ell_to_coo.f90 b/ext/impl/psb_d_cp_ell_to_coo.f90 new file mode 100644 index 00000000..8e7ad735 --- /dev/null +++ b/ext/impl/psb_d_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_to_coo + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_d_cp_ell_to_coo diff --git a/ext/impl/psb_d_cp_ell_to_fmt.f90 b/ext/impl/psb_d_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..fd05d0fd --- /dev/null +++ b/ext/impl/psb_d_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_to_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_d_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cp_ell_to_fmt diff --git a/ext/impl/psb_d_cp_hdia_from_coo.f90 b/ext/impl/psb_d_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..bbc34195 --- /dev/null +++ b/ext/impl/psb_d_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_cp_hdia_from_coo + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_d_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_d_cp_hdia_from_coo diff --git a/ext/impl/psb_d_cp_hdia_to_coo.f90 b/ext/impl/psb_d_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..bfa77b08 --- /dev/null +++ b/ext/impl/psb_d_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_d_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_d_cp_hdia_to_coo diff --git a/ext/impl/psb_d_cp_hll_from_coo.f90 b/ext/impl/psb_d_cp_hll_from_coo.f90 new file mode 100644 index 00000000..03028d20 --- /dev/null +++ b/ext/impl/psb_d_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_from_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_hll_from_coo diff --git a/ext/impl/psb_d_cp_hll_from_fmt.f90 b/ext/impl/psb_d_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..785b23ac --- /dev/null +++ b/ext/impl/psb_d_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_from_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_d_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_d_cp_hll_from_fmt diff --git a/ext/impl/psb_d_cp_hll_to_coo.f90 b/ext/impl/psb_d_cp_hll_to_coo.f90 new file mode 100644 index 00000000..b20144c5 --- /dev/null +++ b/ext/impl/psb_d_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_to_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + real(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_d_cp_hll_to_coo diff --git a/ext/impl/psb_d_cp_hll_to_fmt.f90 b/ext/impl/psb_d_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..6c60c5b5 --- /dev/null +++ b/ext/impl/psb_d_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_to_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_d_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cp_hll_to_fmt diff --git a/ext/impl/psb_d_dia_aclsum.f90 b/ext/impl/psb_d_dia_aclsum.f90 new file mode 100644 index 00000000..0f4df6ca --- /dev/null +++ b/ext/impl/psb_d_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_aclsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_aclsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_aclsum diff --git a/ext/impl/psb_d_dia_allocate_mnnz.f90 b/ext/impl/psb_d_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..309b7d4a --- /dev/null +++ b/ext/impl/psb_d_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_allocate_mnnz diff --git a/ext/impl/psb_d_dia_arwsum.f90 b/ext/impl/psb_d_dia_arwsum.f90 new file mode 100644 index 00000000..98eefc44 --- /dev/null +++ b/ext/impl/psb_d_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_arwsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_arwsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_arwsum diff --git a/ext/impl/psb_d_dia_colsum.f90 b/ext/impl/psb_d_dia_colsum.f90 new file mode 100644 index 00000000..6a6eb81c --- /dev/null +++ b/ext/impl/psb_d_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_colsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_colsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_colsum diff --git a/ext/impl/psb_d_dia_csgetptn.f90 b/ext/impl/psb_d_dia_csgetptn.f90 new file mode 100644 index 00000000..ad0e040a --- /dev/null +++ b/ext/impl/psb_d_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csgetptn + implicit none + + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_d_dia_csgetptn diff --git a/ext/impl/psb_d_dia_csgetrow.f90 b/ext/impl/psb_d_dia_csgetrow.f90 new file mode 100644 index 00000000..7e05a26e --- /dev/null +++ b/ext/impl/psb_d_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csgetrow + implicit none + + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_d_dia_csgetrow diff --git a/ext/impl/psb_d_dia_csmm.f90 b/ext/impl/psb_d_dia_csmm.f90 new file mode 100644 index 00000000..81ad967d --- /dev/null +++ b/ext/impl/psb_d_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csmm + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_d_dia_csmm_inner + +end subroutine psb_d_dia_csmm diff --git a/ext/impl/psb_d_dia_csmv.f90 b/ext/impl/psb_d_dia_csmv.f90 new file mode 100644 index 00000000..166b4c58 --- /dev/null +++ b/ext/impl/psb_d_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csmv + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_d_dia_csmv_inner + +end subroutine psb_d_dia_csmv diff --git a/ext/impl/psb_d_dia_get_diag.f90 b/ext/impl/psb_d_dia_get_diag.f90 new file mode 100644 index 00000000..bbcb4a12 --- /dev/null +++ b/ext/impl/psb_d_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_get_diag + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = done + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = dzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_get_diag diff --git a/ext/impl/psb_d_dia_maxval.f90 b/ext/impl/psb_d_dia_maxval.f90 new file mode 100644 index 00000000..f57be1ff --- /dev/null +++ b/ext/impl/psb_d_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_d_dia_maxval(a) result(res) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_maxval + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_d_dia_maxval diff --git a/ext/impl/psb_d_dia_mold.f90 b/ext/impl/psb_d_dia_mold.f90 new file mode 100644 index 00000000..2b3cef81 --- /dev/null +++ b/ext/impl/psb_d_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_mold(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_mold + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_mold diff --git a/ext/impl/psb_d_dia_print.f90 b/ext/impl/psb_d_dia_print.f90 new file mode 100644 index 00000000..e32dc2ed --- /dev/null +++ b/ext/impl/psb_d_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_dia_print' + logical, parameter :: debug=.false. + + class(psb_d_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_d_dia_print diff --git a/ext/impl/psb_d_dia_reallocate_nz.f90 b/ext/impl/psb_d_dia_reallocate_nz.f90 new file mode 100644 index 00000000..83864dd8 --- /dev/null +++ b/ext/impl/psb_d_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_reallocate_nz diff --git a/ext/impl/psb_d_dia_reinit.f90 b/ext/impl/psb_d_dia_reinit.f90 new file mode 100644 index 00000000..f1e91ade --- /dev/null +++ b/ext/impl/psb_d_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_reinit(a,clear) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_reinit + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = dzero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_reinit diff --git a/ext/impl/psb_d_dia_rowsum.f90 b/ext/impl/psb_d_dia_rowsum.f90 new file mode 100644 index 00000000..7a5875ba --- /dev/null +++ b/ext/impl/psb_d_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_rowsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_rowsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_rowsum diff --git a/ext/impl/psb_d_dia_scal.f90 b/ext/impl/psb_d_dia_scal.f90 new file mode 100644 index 00000000..d87c0d25 --- /dev/null +++ b/ext/impl/psb_d_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_scal + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_scal diff --git a/ext/impl/psb_d_dia_scals.f90 b/ext/impl/psb_d_dia_scals.f90 new file mode 100644 index 00000000..a3958f57 --- /dev/null +++ b/ext/impl/psb_d_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_scals(d,a,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_scals + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_scals diff --git a/ext/impl/psb_d_dns_mat_impl.f90 b/ext/impl/psb_d_dns_mat_impl.f90 new file mode 100644 index 00000000..edf5cde4 --- /dev/null +++ b/ext/impl/psb_d_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_d_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_d_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csmv + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='d_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call dgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_csmv + + +!> Function csmm: +!! \memberof psb_d_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_d_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csmm + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='d_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call dgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_d_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_d_dns_csnmi(a) result(res) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csnmi + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_dpk_) :: acc + + res = dzero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_d_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_d_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_d_dns_get_diag(a,d,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_get_diag + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = dzero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_d_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_d_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_d_dns_sparse_mat +!! \brief Allocate a class(psb_d_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_d_dns_mold(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_mold + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_d_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_d_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_d_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = dzero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_d_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_d_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csgetrow + implicit none + + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_d_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_d_dns_trim(a) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_trim + implicit none + class(psb_d_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Copy and convert from psb_d_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_d_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_cp_dns_from_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = dzero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = dzero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Copy and convert to psb_d_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_d_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_cp_dns_to_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= dzero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_d_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Convert to psb_d_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_d_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_mv_dns_to_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_d_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Convert from psb_d_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_d_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_mv_dns_from_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_d_mv_dns_from_coo + diff --git a/ext/impl/psb_d_ell_aclsum.f90 b/ext/impl/psb_d_ell_aclsum.f90 new file mode 100644 index 00000000..e0bfc18d --- /dev/null +++ b/ext/impl/psb_d_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_aclsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_aclsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_aclsum diff --git a/ext/impl/psb_d_ell_allocate_mnnz.f90 b/ext/impl/psb_d_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..95e4558c --- /dev/null +++ b/ext/impl/psb_d_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_allocate_mnnz diff --git a/ext/impl/psb_d_ell_arwsum.f90 b/ext/impl/psb_d_ell_arwsum.f90 new file mode 100644 index 00000000..6bf3b888 --- /dev/null +++ b/ext/impl/psb_d_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_arwsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_arwsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = done + else + d(i) = dzero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_arwsum diff --git a/ext/impl/psb_d_ell_colsum.f90 b/ext/impl/psb_d_ell_colsum.f90 new file mode 100644 index 00000000..9eb30ca0 --- /dev/null +++ b/ext/impl/psb_d_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_colsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_colsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_colsum diff --git a/ext/impl/psb_d_ell_csgetblk.f90 b/ext/impl/psb_d_ell_csgetblk.f90 new file mode 100644 index 00000000..9725518f --- /dev/null +++ b/ext/impl/psb_d_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csgetblk + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_csgetblk diff --git a/ext/impl/psb_d_ell_csgetptn.f90 b/ext/impl/psb_d_ell_csgetptn.f90 new file mode 100644 index 00000000..a050fe54 --- /dev/null +++ b/ext/impl/psb_d_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csgetptn + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_ell_csgetrow + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_ell_csmm + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_ell_csmv + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_ell_csnm1 + + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_dpk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = done + else + vt(:) = dzero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_d_ell_csnm1 diff --git a/ext/impl/psb_d_ell_csnmi.f90 b/ext/impl/psb_d_ell_csnmi.f90 new file mode 100644 index 00000000..b4e3d03e --- /dev/null +++ b/ext/impl/psb_d_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csnmi + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + done + res = max(res,acc) + end do + +end function psb_d_ell_csnmi diff --git a/ext/impl/psb_d_ell_csput.f90 b/ext/impl/psb_d_ell_csput.f90 new file mode 100644 index 00000000..d38d9d51 --- /dev/null +++ b/ext/impl/psb_d_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csput_a + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_d_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_d_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='d_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_d_ell_srch_upd +end subroutine psb_d_ell_csput_a diff --git a/ext/impl/psb_d_ell_cssm.f90 b/ext/impl/psb_d_ell_cssm.f90 new file mode 100644 index 00000000..3c8b5f21 --- /dev/null +++ b/ext/impl/psb_d_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_cssm + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_d_ell_cssv + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + real(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_d_ell_get_diag + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = done + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = dzero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = dzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_get_diag diff --git a/ext/impl/psb_d_ell_maxval.f90 b/ext/impl/psb_d_ell_maxval.f90 new file mode 100644 index 00000000..d0cb24d3 --- /dev/null +++ b/ext/impl/psb_d_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_ell_maxval(a) result(res) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_maxval + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_d_ell_maxval diff --git a/ext/impl/psb_d_ell_mold.f90 b/ext/impl/psb_d_ell_mold.f90 new file mode 100644 index 00000000..48814f3c --- /dev/null +++ b/ext/impl/psb_d_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_mold(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_mold + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_mold diff --git a/ext/impl/psb_d_ell_print.f90 b/ext/impl/psb_d_ell_print.f90 new file mode 100644 index 00000000..cf539662 --- /dev/null +++ b/ext/impl/psb_d_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_d_ell_print diff --git a/ext/impl/psb_d_ell_reallocate_nz.f90 b/ext/impl/psb_d_ell_reallocate_nz.f90 new file mode 100644 index 00000000..8f92ffad --- /dev/null +++ b/ext/impl/psb_d_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_reallocate_nz diff --git a/ext/impl/psb_d_ell_reinit.f90 b/ext/impl/psb_d_ell_reinit.f90 new file mode 100644 index 00000000..ab9a7ba2 --- /dev/null +++ b/ext/impl/psb_d_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_reinit(a,clear) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_reinit + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = dzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_reinit diff --git a/ext/impl/psb_d_ell_rowsum.f90 b/ext/impl/psb_d_ell_rowsum.f90 new file mode 100644 index 00000000..782775d4 --- /dev/null +++ b/ext/impl/psb_d_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_rowsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_rowsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = done + else + d(i) = dzero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_rowsum diff --git a/ext/impl/psb_d_ell_scal.f90 b/ext/impl/psb_d_ell_scal.f90 new file mode 100644 index 00000000..15be8a66 --- /dev/null +++ b/ext/impl/psb_d_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_scal + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_scal diff --git a/ext/impl/psb_d_ell_scals.f90 b/ext/impl/psb_d_ell_scals.f90 new file mode 100644 index 00000000..501f42b0 --- /dev/null +++ b/ext/impl/psb_d_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_scals(d,a,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_scals + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_scals diff --git a/ext/impl/psb_d_ell_trim.f90 b/ext/impl/psb_d_ell_trim.f90 new file mode 100644 index 00000000..8b1d52f7 --- /dev/null +++ b/ext/impl/psb_d_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_trim(a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_trim + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_trim diff --git a/ext/impl/psb_d_hdia_allocate_mnnz.f90 b/ext/impl/psb_d_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..e5721754 --- /dev/null +++ b/ext/impl/psb_d_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hdia_allocate_mnnz diff --git a/ext/impl/psb_d_hdia_csmv.f90 b/ext/impl/psb_d_hdia_csmv.f90 new file mode 100644 index 00000000..82599342 --- /dev/null +++ b/ext/impl/psb_d_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_csmv + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_d_inner_dia_csmv + +end subroutine psb_d_hdia_csmv diff --git a/ext/impl/psb_d_hdia_mold.f90 b/ext/impl/psb_d_hdia_mold.f90 new file mode 100644 index 00000000..cebedd44 --- /dev/null +++ b/ext/impl/psb_d_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hdia_mold(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_mold + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hdia_mold diff --git a/ext/impl/psb_d_hdia_print.f90 b/ext/impl/psb_d_hdia_print.f90 new file mode 100644 index 00000000..43753299 --- /dev/null +++ b/ext/impl/psb_d_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_d_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + real(psb_dpk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_d_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_d_hdia_print diff --git a/ext/impl/psb_d_hll_aclsum.f90 b/ext/impl/psb_d_hll_aclsum.f90 new file mode 100644 index 00000000..1f868edc --- /dev/null +++ b/ext/impl/psb_d_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_aclsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_aclsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine d_hll_aclsum + +end subroutine psb_d_hll_aclsum diff --git a/ext/impl/psb_d_hll_allocate_mnnz.f90 b/ext/impl/psb_d_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..f58d0e4a --- /dev/null +++ b/ext/impl/psb_d_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_allocate_mnnz diff --git a/ext/impl/psb_d_hll_arwsum.f90 b/ext/impl/psb_d_hll_arwsum.f90 new file mode 100644 index 00000000..e5ae24fb --- /dev/null +++ b/ext/impl/psb_d_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_arwsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_arwsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine d_hll_arwsum + +end subroutine psb_d_hll_arwsum diff --git a/ext/impl/psb_d_hll_colsum.f90 b/ext/impl/psb_d_hll_colsum.f90 new file mode 100644 index 00000000..8c2020ec --- /dev/null +++ b/ext/impl/psb_d_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_colsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_colsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine d_hll_colsum + +end subroutine psb_d_hll_colsum diff --git a/ext/impl/psb_d_hll_csgetblk.f90 b/ext/impl/psb_d_hll_csgetblk.f90 new file mode 100644 index 00000000..185baf29 --- /dev/null +++ b/ext/impl/psb_d_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csgetblk + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_csgetblk diff --git a/ext/impl/psb_d_hll_csgetptn.f90 b/ext/impl/psb_d_hll_csgetptn.f90 new file mode 100644 index 00000000..a7cdc148 --- /dev/null +++ b/ext/impl/psb_d_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csgetptn + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_hll_csgetrow + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_hll_csmm + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + real(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_d_hll_csmv + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_d_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_d_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == done) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == done) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == dzero) then + if (beta == dzero) then + do i=1,m + y(ir+i-1) = dzero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == dzero) then + do i=1,m + tmp = dzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = dzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_d_hll_csmv_inner + + subroutine psb_d_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_8 + + subroutine psb_d_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_24 + + subroutine psb_d_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_16 + + subroutine psb_d_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_32 + + subroutine psb_d_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_4 + +end subroutine psb_d_hll_csmv diff --git a/ext/impl/psb_d_hll_csnm1.f90 b/ext/impl/psb_d_hll_csnm1.f90 new file mode 100644 index 00000000..4627a4d2 --- /dev/null +++ b/ext/impl/psb_d_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csnm1 + + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_dpk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = dzero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = done + else + vt = dzero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_d_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_d_hll_csnm1_inner + +end function psb_d_hll_csnm1 diff --git a/ext/impl/psb_d_hll_csnmi.f90 b/ext/impl/psb_d_hll_csnmi.f90 new file mode 100644 index 00000000..2b758fa3 --- /dev/null +++ b/ext/impl/psb_d_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csnmi + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='d_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = dzero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_d_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = done + else + tmp = dzero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_d_hll_csnmi_inner + +end function psb_d_hll_csnmi diff --git a/ext/impl/psb_d_hll_csput.f90 b/ext/impl/psb_d_hll_csput.f90 new file mode 100644 index 00000000..064e6c59 --- /dev/null +++ b/ext/impl/psb_d_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csput_a + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_d_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='d_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_d_hll_srch_upd + +end subroutine psb_d_hll_csput_a diff --git a/ext/impl/psb_d_hll_cssm.f90 b/ext/impl/psb_d_hll_cssm.f90 new file mode 100644 index 00000000..f4f6e349 --- /dev/null +++ b/ext/impl/psb_d_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_cssm + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + real(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_d_hll_cssv + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + real(psb_dpk_) :: acc + real(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_d_hll_get_diag + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = done + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_d_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = dzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = dzero + end if + end do + + end subroutine psb_d_hll_get_diag_inner + +end subroutine psb_d_hll_get_diag diff --git a/ext/impl/psb_d_hll_maxval.f90 b/ext/impl/psb_d_hll_maxval.f90 new file mode 100644 index 00000000..8408cc96 --- /dev/null +++ b/ext/impl/psb_d_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_hll_maxval(a) result(res) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_maxval + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,done) + +end function psb_d_hll_maxval diff --git a/ext/impl/psb_d_hll_mold.f90 b/ext/impl/psb_d_hll_mold.f90 new file mode 100644 index 00000000..e9d721f0 --- /dev/null +++ b/ext/impl/psb_d_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_mold(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_mold + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_mold diff --git a/ext/impl/psb_d_hll_print.f90 b/ext/impl/psb_d_hll_print.f90 new file mode 100644 index 00000000..93c56d5c --- /dev/null +++ b/ext/impl/psb_d_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_d_hll_print diff --git a/ext/impl/psb_d_hll_reallocate_nz.f90 b/ext/impl/psb_d_hll_reallocate_nz.f90 new file mode 100644 index 00000000..7abdd58f --- /dev/null +++ b/ext/impl/psb_d_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_reallocate_nz diff --git a/ext/impl/psb_d_hll_reinit.f90 b/ext/impl/psb_d_hll_reinit.f90 new file mode 100644 index 00000000..6a0f34fa --- /dev/null +++ b/ext/impl/psb_d_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_reinit(a,clear) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_reinit + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = dzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_reinit diff --git a/ext/impl/psb_d_hll_rowsum.f90 b/ext/impl/psb_d_hll_rowsum.f90 new file mode 100644 index 00000000..bfa2d2e1 --- /dev/null +++ b/ext/impl/psb_d_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_rowsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_rowsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = done + else + d = dzero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine d_hll_rowsum + +end subroutine psb_d_hll_rowsum diff --git a/ext/impl/psb_d_hll_scal.f90 b/ext/impl/psb_d_hll_scal.f90 new file mode 100644 index 00000000..ed9dd9ce --- /dev/null +++ b/ext/impl/psb_d_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_scal + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_d_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: d(*) + real(psb_dpk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_d_hll_scal_inner + + +end subroutine psb_d_hll_scal diff --git a/ext/impl/psb_d_hll_scals.f90 b/ext/impl/psb_d_hll_scals.f90 new file mode 100644 index 00000000..8e05cddd --- /dev/null +++ b/ext/impl/psb_d_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_scals(d,a,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_scals + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_scals diff --git a/ext/impl/psb_d_mv_dia_from_coo.f90 b/ext/impl/psb_d_mv_dia_from_coo.f90 new file mode 100644 index 00000000..e38e975a --- /dev/null +++ b/ext/impl/psb_d_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_mv_dia_from_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_dia_from_coo diff --git a/ext/impl/psb_d_mv_dia_to_coo.f90 b/ext/impl/psb_d_mv_dia_to_coo.f90 new file mode 100644 index 00000000..d8ac7a69 --- /dev/null +++ b/ext/impl/psb_d_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_mv_dia_to_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_dia_to_coo diff --git a/ext/impl/psb_d_mv_ell_from_coo.f90 b/ext/impl/psb_d_mv_ell_from_coo.f90 new file mode 100644 index 00000000..8f98daab --- /dev/null +++ b/ext/impl/psb_d_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_from_coo + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_d_mv_ell_from_coo diff --git a/ext/impl/psb_d_mv_ell_from_fmt.f90 b/ext/impl/psb_d_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..6589fd0a --- /dev/null +++ b/ext/impl/psb_d_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_from_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_d_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_ell_from_fmt diff --git a/ext/impl/psb_d_mv_ell_to_coo.f90 b/ext/impl/psb_d_mv_ell_to_coo.f90 new file mode 100644 index 00000000..a1220a6e --- /dev/null +++ b/ext/impl/psb_d_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_to_coo + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_ell_to_coo diff --git a/ext/impl/psb_d_mv_ell_to_fmt.f90 b/ext/impl/psb_d_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..a5975360 --- /dev/null +++ b/ext/impl/psb_d_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_to_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_d_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_ell_to_fmt diff --git a/ext/impl/psb_d_mv_hdia_from_coo.f90 b/ext/impl/psb_d_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..68caea34 --- /dev/null +++ b/ext/impl/psb_d_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_mv_hdia_from_coo + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_hdia_from_coo diff --git a/ext/impl/psb_d_mv_hdia_to_coo.f90 b/ext/impl/psb_d_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..595e20a2 --- /dev/null +++ b/ext/impl/psb_d_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_mv_hdia_to_coo + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_hdia_to_coo diff --git a/ext/impl/psb_d_mv_hll_from_coo.f90 b/ext/impl/psb_d_mv_hll_from_coo.f90 new file mode 100644 index 00000000..78faed4b --- /dev/null +++ b/ext/impl/psb_d_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_from_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_hll_from_coo diff --git a/ext/impl/psb_d_mv_hll_from_fmt.f90 b/ext/impl/psb_d_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..76a2f2fb --- /dev/null +++ b/ext/impl/psb_d_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_from_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_d_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_hll_from_fmt diff --git a/ext/impl/psb_d_mv_hll_to_coo.f90 b/ext/impl/psb_d_mv_hll_to_coo.f90 new file mode 100644 index 00000000..fbc9111b --- /dev/null +++ b/ext/impl/psb_d_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_to_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_hll_to_coo diff --git a/ext/impl/psb_d_mv_hll_to_fmt.f90 b/ext/impl/psb_d_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..8022b2e5 --- /dev/null +++ b/ext/impl/psb_d_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_to_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_d_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_hll_to_fmt diff --git a/ext/impl/psb_s_cp_dia_from_coo.f90 b/ext/impl/psb_s_cp_dia_from_coo.f90 new file mode 100644 index 00000000..6d9a0749 --- /dev/null +++ b/ext/impl/psb_s_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_cp_dia_from_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_dia_from_coo diff --git a/ext/impl/psb_s_cp_dia_to_coo.f90 b/ext/impl/psb_s_cp_dia_to_coo.f90 new file mode 100644 index 00000000..c0cd5d32 --- /dev/null +++ b/ext/impl/psb_s_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_cp_dia_to_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + call psi_s_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_s_cp_dia_to_coo diff --git a/ext/impl/psb_s_cp_ell_from_coo.f90 b/ext/impl/psb_s_cp_ell_from_coo.f90 new file mode 100644 index 00000000..f178a05c --- /dev/null +++ b/ext/impl/psb_s_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_s_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_s_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_s_cp_ell_from_coo diff --git a/ext/impl/psb_s_cp_ell_from_fmt.f90 b/ext/impl/psb_s_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..bffe3d85 --- /dev/null +++ b/ext/impl/psb_s_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_from_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_s_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_s_cp_ell_from_fmt diff --git a/ext/impl/psb_s_cp_ell_to_coo.f90 b/ext/impl/psb_s_cp_ell_to_coo.f90 new file mode 100644 index 00000000..b8acddfc --- /dev/null +++ b/ext/impl/psb_s_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_to_coo + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_s_cp_ell_to_coo diff --git a/ext/impl/psb_s_cp_ell_to_fmt.f90 b/ext/impl/psb_s_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..58fe3756 --- /dev/null +++ b/ext/impl/psb_s_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_to_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_s_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cp_ell_to_fmt diff --git a/ext/impl/psb_s_cp_hdia_from_coo.f90 b/ext/impl/psb_s_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..b3d427d9 --- /dev/null +++ b/ext/impl/psb_s_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_cp_hdia_from_coo + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_s_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_s_cp_hdia_from_coo diff --git a/ext/impl/psb_s_cp_hdia_to_coo.f90 b/ext/impl/psb_s_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..8e90e236 --- /dev/null +++ b/ext/impl/psb_s_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_s_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_s_cp_hdia_to_coo diff --git a/ext/impl/psb_s_cp_hll_from_coo.f90 b/ext/impl/psb_s_cp_hll_from_coo.f90 new file mode 100644 index 00000000..9d75f994 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_from_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_hll_from_coo diff --git a/ext/impl/psb_s_cp_hll_from_fmt.f90 b/ext/impl/psb_s_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..8f010902 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_from_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_s_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_s_cp_hll_from_fmt diff --git a/ext/impl/psb_s_cp_hll_to_coo.f90 b/ext/impl/psb_s_cp_hll_to_coo.f90 new file mode 100644 index 00000000..74502ba2 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_to_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + real(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_s_cp_hll_to_coo diff --git a/ext/impl/psb_s_cp_hll_to_fmt.f90 b/ext/impl/psb_s_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..f7adaa54 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_to_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_s_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cp_hll_to_fmt diff --git a/ext/impl/psb_s_dia_aclsum.f90 b/ext/impl/psb_s_dia_aclsum.f90 new file mode 100644 index 00000000..718a2424 --- /dev/null +++ b/ext/impl/psb_s_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_aclsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_aclsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_aclsum diff --git a/ext/impl/psb_s_dia_allocate_mnnz.f90 b/ext/impl/psb_s_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..df56c4a6 --- /dev/null +++ b/ext/impl/psb_s_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_allocate_mnnz diff --git a/ext/impl/psb_s_dia_arwsum.f90 b/ext/impl/psb_s_dia_arwsum.f90 new file mode 100644 index 00000000..5a974bbf --- /dev/null +++ b/ext/impl/psb_s_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_arwsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_arwsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_arwsum diff --git a/ext/impl/psb_s_dia_colsum.f90 b/ext/impl/psb_s_dia_colsum.f90 new file mode 100644 index 00000000..e60eb88f --- /dev/null +++ b/ext/impl/psb_s_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_colsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_colsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_colsum diff --git a/ext/impl/psb_s_dia_csgetptn.f90 b/ext/impl/psb_s_dia_csgetptn.f90 new file mode 100644 index 00000000..f946eb73 --- /dev/null +++ b/ext/impl/psb_s_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csgetptn + implicit none + + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_s_dia_csgetptn diff --git a/ext/impl/psb_s_dia_csgetrow.f90 b/ext/impl/psb_s_dia_csgetrow.f90 new file mode 100644 index 00000000..b79e2470 --- /dev/null +++ b/ext/impl/psb_s_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csgetrow + implicit none + + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_s_dia_csgetrow diff --git a/ext/impl/psb_s_dia_csmm.f90 b/ext/impl/psb_s_dia_csmm.f90 new file mode 100644 index 00000000..9f586dbb --- /dev/null +++ b/ext/impl/psb_s_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csmm + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_s_dia_csmm_inner + +end subroutine psb_s_dia_csmm diff --git a/ext/impl/psb_s_dia_csmv.f90 b/ext/impl/psb_s_dia_csmv.f90 new file mode 100644 index 00000000..1a23932e --- /dev/null +++ b/ext/impl/psb_s_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csmv + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_s_dia_csmv_inner + +end subroutine psb_s_dia_csmv diff --git a/ext/impl/psb_s_dia_get_diag.f90 b/ext/impl/psb_s_dia_get_diag.f90 new file mode 100644 index 00000000..5909c72a --- /dev/null +++ b/ext/impl/psb_s_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_get_diag + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = sone + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = szero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_get_diag diff --git a/ext/impl/psb_s_dia_maxval.f90 b/ext/impl/psb_s_dia_maxval.f90 new file mode 100644 index 00000000..5f672644 --- /dev/null +++ b/ext/impl/psb_s_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_s_dia_maxval(a) result(res) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_maxval + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_s_dia_maxval diff --git a/ext/impl/psb_s_dia_mold.f90 b/ext/impl/psb_s_dia_mold.f90 new file mode 100644 index 00000000..a65379a4 --- /dev/null +++ b/ext/impl/psb_s_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_mold(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_mold + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_mold diff --git a/ext/impl/psb_s_dia_print.f90 b/ext/impl/psb_s_dia_print.f90 new file mode 100644 index 00000000..a0de1ba8 --- /dev/null +++ b/ext/impl/psb_s_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_dia_print' + logical, parameter :: debug=.false. + + class(psb_s_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_s_dia_print diff --git a/ext/impl/psb_s_dia_reallocate_nz.f90 b/ext/impl/psb_s_dia_reallocate_nz.f90 new file mode 100644 index 00000000..d37d9e5f --- /dev/null +++ b/ext/impl/psb_s_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_reallocate_nz diff --git a/ext/impl/psb_s_dia_reinit.f90 b/ext/impl/psb_s_dia_reinit.f90 new file mode 100644 index 00000000..dd109783 --- /dev/null +++ b/ext/impl/psb_s_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_reinit(a,clear) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_reinit + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = szero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_reinit diff --git a/ext/impl/psb_s_dia_rowsum.f90 b/ext/impl/psb_s_dia_rowsum.f90 new file mode 100644 index 00000000..3f32a2b2 --- /dev/null +++ b/ext/impl/psb_s_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_rowsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_rowsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_rowsum diff --git a/ext/impl/psb_s_dia_scal.f90 b/ext/impl/psb_s_dia_scal.f90 new file mode 100644 index 00000000..7ccf881a --- /dev/null +++ b/ext/impl/psb_s_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_scal + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_scal diff --git a/ext/impl/psb_s_dia_scals.f90 b/ext/impl/psb_s_dia_scals.f90 new file mode 100644 index 00000000..da1bc94e --- /dev/null +++ b/ext/impl/psb_s_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_scals(d,a,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_scals + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_scals diff --git a/ext/impl/psb_s_dns_mat_impl.f90 b/ext/impl/psb_s_dns_mat_impl.f90 new file mode 100644 index 00000000..f6f458c8 --- /dev/null +++ b/ext/impl/psb_s_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_s_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_s_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csmv + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='s_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call sgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_csmv + + +!> Function csmm: +!! \memberof psb_s_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_s_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csmm + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='s_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call sgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_s_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_s_dns_csnmi(a) result(res) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csnmi + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_spk_) :: acc + + res = szero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_s_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_s_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_s_dns_get_diag(a,d,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_get_diag + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = szero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_s_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_s_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_s_dns_sparse_mat +!! \brief Allocate a class(psb_s_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_s_dns_mold(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_mold + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_s_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_s_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_s_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = szero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_s_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_s_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csgetrow + implicit none + + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_s_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_s_dns_trim(a) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_trim + implicit none + class(psb_s_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Copy and convert from psb_s_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_s_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_cp_dns_from_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = szero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = szero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Copy and convert to psb_s_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_s_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_cp_dns_to_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= szero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_s_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Convert to psb_s_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_s_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_mv_dns_to_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_s_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Convert from psb_s_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_s_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_mv_dns_from_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_s_mv_dns_from_coo + diff --git a/ext/impl/psb_s_ell_aclsum.f90 b/ext/impl/psb_s_ell_aclsum.f90 new file mode 100644 index 00000000..2eea0cc9 --- /dev/null +++ b/ext/impl/psb_s_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_aclsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_aclsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_aclsum diff --git a/ext/impl/psb_s_ell_allocate_mnnz.f90 b/ext/impl/psb_s_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..fd9f1b49 --- /dev/null +++ b/ext/impl/psb_s_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_allocate_mnnz diff --git a/ext/impl/psb_s_ell_arwsum.f90 b/ext/impl/psb_s_ell_arwsum.f90 new file mode 100644 index 00000000..a47f8721 --- /dev/null +++ b/ext/impl/psb_s_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_arwsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_arwsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = sone + else + d(i) = szero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_arwsum diff --git a/ext/impl/psb_s_ell_colsum.f90 b/ext/impl/psb_s_ell_colsum.f90 new file mode 100644 index 00000000..0924d8a9 --- /dev/null +++ b/ext/impl/psb_s_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_colsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_colsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_colsum diff --git a/ext/impl/psb_s_ell_csgetblk.f90 b/ext/impl/psb_s_ell_csgetblk.f90 new file mode 100644 index 00000000..5468e93c --- /dev/null +++ b/ext/impl/psb_s_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csgetblk + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_csgetblk diff --git a/ext/impl/psb_s_ell_csgetptn.f90 b/ext/impl/psb_s_ell_csgetptn.f90 new file mode 100644 index 00000000..07463757 --- /dev/null +++ b/ext/impl/psb_s_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csgetptn + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_ell_csgetrow + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_ell_csmm + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_ell_csmv + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_ell_csnm1 + + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_spk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = sone + else + vt(:) = szero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_s_ell_csnm1 diff --git a/ext/impl/psb_s_ell_csnmi.f90 b/ext/impl/psb_s_ell_csnmi.f90 new file mode 100644 index 00000000..1df9bafa --- /dev/null +++ b/ext/impl/psb_s_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csnmi + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + sone + res = max(res,acc) + end do + +end function psb_s_ell_csnmi diff --git a/ext/impl/psb_s_ell_csput.f90 b/ext/impl/psb_s_ell_csput.f90 new file mode 100644 index 00000000..c0d69067 --- /dev/null +++ b/ext/impl/psb_s_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csput_a + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_s_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_s_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='s_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_s_ell_srch_upd +end subroutine psb_s_ell_csput_a diff --git a/ext/impl/psb_s_ell_cssm.f90 b/ext/impl/psb_s_ell_cssm.f90 new file mode 100644 index 00000000..ca50fa35 --- /dev/null +++ b/ext/impl/psb_s_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_cssm + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_s_ell_cssv + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + real(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_s_ell_get_diag + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = sone + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = szero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = szero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_get_diag diff --git a/ext/impl/psb_s_ell_maxval.f90 b/ext/impl/psb_s_ell_maxval.f90 new file mode 100644 index 00000000..6e2635b8 --- /dev/null +++ b/ext/impl/psb_s_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_ell_maxval(a) result(res) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_maxval + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_s_ell_maxval diff --git a/ext/impl/psb_s_ell_mold.f90 b/ext/impl/psb_s_ell_mold.f90 new file mode 100644 index 00000000..4d137112 --- /dev/null +++ b/ext/impl/psb_s_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_mold(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_mold + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_mold diff --git a/ext/impl/psb_s_ell_print.f90 b/ext/impl/psb_s_ell_print.f90 new file mode 100644 index 00000000..aec15a6d --- /dev/null +++ b/ext/impl/psb_s_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_s_ell_print diff --git a/ext/impl/psb_s_ell_reallocate_nz.f90 b/ext/impl/psb_s_ell_reallocate_nz.f90 new file mode 100644 index 00000000..ff7dabda --- /dev/null +++ b/ext/impl/psb_s_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_reallocate_nz diff --git a/ext/impl/psb_s_ell_reinit.f90 b/ext/impl/psb_s_ell_reinit.f90 new file mode 100644 index 00000000..088e8398 --- /dev/null +++ b/ext/impl/psb_s_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_reinit(a,clear) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_reinit + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = szero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_reinit diff --git a/ext/impl/psb_s_ell_rowsum.f90 b/ext/impl/psb_s_ell_rowsum.f90 new file mode 100644 index 00000000..092329c9 --- /dev/null +++ b/ext/impl/psb_s_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_rowsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_rowsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = sone + else + d(i) = szero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_rowsum diff --git a/ext/impl/psb_s_ell_scal.f90 b/ext/impl/psb_s_ell_scal.f90 new file mode 100644 index 00000000..7f39f63d --- /dev/null +++ b/ext/impl/psb_s_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_scal + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_scal diff --git a/ext/impl/psb_s_ell_scals.f90 b/ext/impl/psb_s_ell_scals.f90 new file mode 100644 index 00000000..4bc77626 --- /dev/null +++ b/ext/impl/psb_s_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_scals(d,a,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_scals + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_scals diff --git a/ext/impl/psb_s_ell_trim.f90 b/ext/impl/psb_s_ell_trim.f90 new file mode 100644 index 00000000..758a8bb5 --- /dev/null +++ b/ext/impl/psb_s_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_trim(a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_trim + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_trim diff --git a/ext/impl/psb_s_hdia_allocate_mnnz.f90 b/ext/impl/psb_s_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..2c4e16fc --- /dev/null +++ b/ext/impl/psb_s_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hdia_allocate_mnnz diff --git a/ext/impl/psb_s_hdia_csmv.f90 b/ext/impl/psb_s_hdia_csmv.f90 new file mode 100644 index 00000000..d945f964 --- /dev/null +++ b/ext/impl/psb_s_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_csmv + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_s_inner_dia_csmv + +end subroutine psb_s_hdia_csmv diff --git a/ext/impl/psb_s_hdia_mold.f90 b/ext/impl/psb_s_hdia_mold.f90 new file mode 100644 index 00000000..a62630c0 --- /dev/null +++ b/ext/impl/psb_s_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hdia_mold(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_mold + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hdia_mold diff --git a/ext/impl/psb_s_hdia_print.f90 b/ext/impl/psb_s_hdia_print.f90 new file mode 100644 index 00000000..f4b927bc --- /dev/null +++ b/ext/impl/psb_s_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_s_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + real(psb_spk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_s_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_s_hdia_print diff --git a/ext/impl/psb_s_hll_aclsum.f90 b/ext/impl/psb_s_hll_aclsum.f90 new file mode 100644 index 00000000..cf75dfb2 --- /dev/null +++ b/ext/impl/psb_s_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_aclsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_aclsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine s_hll_aclsum + +end subroutine psb_s_hll_aclsum diff --git a/ext/impl/psb_s_hll_allocate_mnnz.f90 b/ext/impl/psb_s_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..549eccb4 --- /dev/null +++ b/ext/impl/psb_s_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_allocate_mnnz diff --git a/ext/impl/psb_s_hll_arwsum.f90 b/ext/impl/psb_s_hll_arwsum.f90 new file mode 100644 index 00000000..b93efb12 --- /dev/null +++ b/ext/impl/psb_s_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_arwsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_arwsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine s_hll_arwsum + +end subroutine psb_s_hll_arwsum diff --git a/ext/impl/psb_s_hll_colsum.f90 b/ext/impl/psb_s_hll_colsum.f90 new file mode 100644 index 00000000..02cceac2 --- /dev/null +++ b/ext/impl/psb_s_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_colsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_colsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine s_hll_colsum + +end subroutine psb_s_hll_colsum diff --git a/ext/impl/psb_s_hll_csgetblk.f90 b/ext/impl/psb_s_hll_csgetblk.f90 new file mode 100644 index 00000000..c925e3a2 --- /dev/null +++ b/ext/impl/psb_s_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csgetblk + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_csgetblk diff --git a/ext/impl/psb_s_hll_csgetptn.f90 b/ext/impl/psb_s_hll_csgetptn.f90 new file mode 100644 index 00000000..ccb1b6a1 --- /dev/null +++ b/ext/impl/psb_s_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csgetptn + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_hll_csgetrow + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_hll_csmm + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + real(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_s_hll_csmv + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_s_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_s_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == sone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == sone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == szero) then + if (beta == szero) then + do i=1,m + y(ir+i-1) = szero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == szero) then + do i=1,m + tmp = szero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = szero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_s_hll_csmv_inner + + subroutine psb_s_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == szero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_8 + + subroutine psb_s_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == szero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_24 + + subroutine psb_s_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == szero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_16 + + subroutine psb_s_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == szero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_32 + + subroutine psb_s_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == szero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_4 + +end subroutine psb_s_hll_csmv diff --git a/ext/impl/psb_s_hll_csnm1.f90 b/ext/impl/psb_s_hll_csnm1.f90 new file mode 100644 index 00000000..6e745081 --- /dev/null +++ b/ext/impl/psb_s_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csnm1 + + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_spk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = szero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = sone + else + vt = szero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_s_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_s_hll_csnm1_inner + +end function psb_s_hll_csnm1 diff --git a/ext/impl/psb_s_hll_csnmi.f90 b/ext/impl/psb_s_hll_csnmi.f90 new file mode 100644 index 00000000..3be15f9b --- /dev/null +++ b/ext/impl/psb_s_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csnmi + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='s_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = szero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_s_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = sone + else + tmp = szero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_s_hll_csnmi_inner + +end function psb_s_hll_csnmi diff --git a/ext/impl/psb_s_hll_csput.f90 b/ext/impl/psb_s_hll_csput.f90 new file mode 100644 index 00000000..b12678d4 --- /dev/null +++ b/ext/impl/psb_s_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csput_a + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_s_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='s_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_s_hll_srch_upd + +end subroutine psb_s_hll_csput_a diff --git a/ext/impl/psb_s_hll_cssm.f90 b/ext/impl/psb_s_hll_cssm.f90 new file mode 100644 index 00000000..30c77c8f --- /dev/null +++ b/ext/impl/psb_s_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_cssm + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + real(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_s_hll_cssv + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + real(psb_spk_) :: acc + real(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_s_hll_get_diag + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = sone + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_s_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = szero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = szero + end if + end do + + end subroutine psb_s_hll_get_diag_inner + +end subroutine psb_s_hll_get_diag diff --git a/ext/impl/psb_s_hll_maxval.f90 b/ext/impl/psb_s_hll_maxval.f90 new file mode 100644 index 00000000..84625328 --- /dev/null +++ b/ext/impl/psb_s_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_hll_maxval(a) result(res) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_maxval + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,sone) + +end function psb_s_hll_maxval diff --git a/ext/impl/psb_s_hll_mold.f90 b/ext/impl/psb_s_hll_mold.f90 new file mode 100644 index 00000000..eb04ccd9 --- /dev/null +++ b/ext/impl/psb_s_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_mold(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_mold + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_mold diff --git a/ext/impl/psb_s_hll_print.f90 b/ext/impl/psb_s_hll_print.f90 new file mode 100644 index 00000000..fb6bb38d --- /dev/null +++ b/ext/impl/psb_s_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_s_hll_print diff --git a/ext/impl/psb_s_hll_reallocate_nz.f90 b/ext/impl/psb_s_hll_reallocate_nz.f90 new file mode 100644 index 00000000..f7a3076f --- /dev/null +++ b/ext/impl/psb_s_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_reallocate_nz diff --git a/ext/impl/psb_s_hll_reinit.f90 b/ext/impl/psb_s_hll_reinit.f90 new file mode 100644 index 00000000..170abe08 --- /dev/null +++ b/ext/impl/psb_s_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_reinit(a,clear) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_reinit + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = szero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_reinit diff --git a/ext/impl/psb_s_hll_rowsum.f90 b/ext/impl/psb_s_hll_rowsum.f90 new file mode 100644 index 00000000..c7484698 --- /dev/null +++ b/ext/impl/psb_s_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_rowsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_rowsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = sone + else + d = szero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine s_hll_rowsum + +end subroutine psb_s_hll_rowsum diff --git a/ext/impl/psb_s_hll_scal.f90 b/ext/impl/psb_s_hll_scal.f90 new file mode 100644 index 00000000..c8f3ddd5 --- /dev/null +++ b/ext/impl/psb_s_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_scal + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_s_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: d(*) + real(psb_spk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_s_hll_scal_inner + + +end subroutine psb_s_hll_scal diff --git a/ext/impl/psb_s_hll_scals.f90 b/ext/impl/psb_s_hll_scals.f90 new file mode 100644 index 00000000..8f823a20 --- /dev/null +++ b/ext/impl/psb_s_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_scals(d,a,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_scals + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_scals diff --git a/ext/impl/psb_s_mv_dia_from_coo.f90 b/ext/impl/psb_s_mv_dia_from_coo.f90 new file mode 100644 index 00000000..d7dcfc1b --- /dev/null +++ b/ext/impl/psb_s_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_mv_dia_from_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_dia_from_coo diff --git a/ext/impl/psb_s_mv_dia_to_coo.f90 b/ext/impl/psb_s_mv_dia_to_coo.f90 new file mode 100644 index 00000000..c0944b21 --- /dev/null +++ b/ext/impl/psb_s_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_mv_dia_to_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_dia_to_coo diff --git a/ext/impl/psb_s_mv_ell_from_coo.f90 b/ext/impl/psb_s_mv_ell_from_coo.f90 new file mode 100644 index 00000000..90965e41 --- /dev/null +++ b/ext/impl/psb_s_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_from_coo + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_s_mv_ell_from_coo diff --git a/ext/impl/psb_s_mv_ell_from_fmt.f90 b/ext/impl/psb_s_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..03ebf8e4 --- /dev/null +++ b/ext/impl/psb_s_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_from_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_s_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_ell_from_fmt diff --git a/ext/impl/psb_s_mv_ell_to_coo.f90 b/ext/impl/psb_s_mv_ell_to_coo.f90 new file mode 100644 index 00000000..151cbeff --- /dev/null +++ b/ext/impl/psb_s_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_to_coo + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_ell_to_coo diff --git a/ext/impl/psb_s_mv_ell_to_fmt.f90 b/ext/impl/psb_s_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..66f33508 --- /dev/null +++ b/ext/impl/psb_s_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_to_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_s_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_ell_to_fmt diff --git a/ext/impl/psb_s_mv_hdia_from_coo.f90 b/ext/impl/psb_s_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..88765079 --- /dev/null +++ b/ext/impl/psb_s_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_mv_hdia_from_coo + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_hdia_from_coo diff --git a/ext/impl/psb_s_mv_hdia_to_coo.f90 b/ext/impl/psb_s_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..56399b2a --- /dev/null +++ b/ext/impl/psb_s_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_mv_hdia_to_coo + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_hdia_to_coo diff --git a/ext/impl/psb_s_mv_hll_from_coo.f90 b/ext/impl/psb_s_mv_hll_from_coo.f90 new file mode 100644 index 00000000..c8e46086 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_from_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_hll_from_coo diff --git a/ext/impl/psb_s_mv_hll_from_fmt.f90 b/ext/impl/psb_s_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..19bda0a6 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_from_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_s_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_hll_from_fmt diff --git a/ext/impl/psb_s_mv_hll_to_coo.f90 b/ext/impl/psb_s_mv_hll_to_coo.f90 new file mode 100644 index 00000000..d36286a5 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_to_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_hll_to_coo diff --git a/ext/impl/psb_s_mv_hll_to_fmt.f90 b/ext/impl/psb_s_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..17618f69 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_to_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_s_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_hll_to_fmt diff --git a/ext/impl/psb_z_cp_dia_from_coo.f90 b/ext/impl/psb_z_cp_dia_from_coo.f90 new file mode 100644 index 00000000..e87bfb34 --- /dev/null +++ b/ext/impl/psb_z_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_cp_dia_from_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_dia_from_coo diff --git a/ext/impl/psb_z_cp_dia_to_coo.f90 b/ext/impl/psb_z_cp_dia_to_coo.f90 new file mode 100644 index 00000000..26fac30b --- /dev/null +++ b/ext/impl/psb_z_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_cp_dia_to_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + call psi_z_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_z_cp_dia_to_coo diff --git a/ext/impl/psb_z_cp_ell_from_coo.f90 b/ext/impl/psb_z_cp_ell_from_coo.f90 new file mode 100644 index 00000000..7559621d --- /dev/null +++ b/ext/impl/psb_z_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_z_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_z_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_z_cp_ell_from_coo diff --git a/ext/impl/psb_z_cp_ell_from_fmt.f90 b/ext/impl/psb_z_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..6d63b64e --- /dev/null +++ b/ext/impl/psb_z_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_from_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_z_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_z_cp_ell_from_fmt diff --git a/ext/impl/psb_z_cp_ell_to_coo.f90 b/ext/impl/psb_z_cp_ell_to_coo.f90 new file mode 100644 index 00000000..38a1696b --- /dev/null +++ b/ext/impl/psb_z_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_to_coo + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_z_cp_ell_to_coo diff --git a/ext/impl/psb_z_cp_ell_to_fmt.f90 b/ext/impl/psb_z_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..7fb64a90 --- /dev/null +++ b/ext/impl/psb_z_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_to_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_z_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cp_ell_to_fmt diff --git a/ext/impl/psb_z_cp_hdia_from_coo.f90 b/ext/impl/psb_z_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..ed77914e --- /dev/null +++ b/ext/impl/psb_z_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_cp_hdia_from_coo + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_z_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_z_cp_hdia_from_coo diff --git a/ext/impl/psb_z_cp_hdia_to_coo.f90 b/ext/impl/psb_z_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..c0544ff0 --- /dev/null +++ b/ext/impl/psb_z_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_z_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_z_cp_hdia_to_coo diff --git a/ext/impl/psb_z_cp_hll_from_coo.f90 b/ext/impl/psb_z_cp_hll_from_coo.f90 new file mode 100644 index 00000000..15a8d1c2 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_from_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_hll_from_coo diff --git a/ext/impl/psb_z_cp_hll_from_fmt.f90 b/ext/impl/psb_z_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..3bdb2271 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_from_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_z_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_z_cp_hll_from_fmt diff --git a/ext/impl/psb_z_cp_hll_to_coo.f90 b/ext/impl/psb_z_cp_hll_to_coo.f90 new file mode 100644 index 00000000..409fe7b5 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_to_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + complex(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_z_cp_hll_to_coo diff --git a/ext/impl/psb_z_cp_hll_to_fmt.f90 b/ext/impl/psb_z_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..b0417c92 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_to_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_z_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cp_hll_to_fmt diff --git a/ext/impl/psb_z_dia_aclsum.f90 b/ext/impl/psb_z_dia_aclsum.f90 new file mode 100644 index 00000000..5aed7ff0 --- /dev/null +++ b/ext/impl/psb_z_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_aclsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_aclsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_aclsum diff --git a/ext/impl/psb_z_dia_allocate_mnnz.f90 b/ext/impl/psb_z_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..e9c614f6 --- /dev/null +++ b/ext/impl/psb_z_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_allocate_mnnz diff --git a/ext/impl/psb_z_dia_arwsum.f90 b/ext/impl/psb_z_dia_arwsum.f90 new file mode 100644 index 00000000..42805349 --- /dev/null +++ b/ext/impl/psb_z_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_arwsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_arwsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_arwsum diff --git a/ext/impl/psb_z_dia_colsum.f90 b/ext/impl/psb_z_dia_colsum.f90 new file mode 100644 index 00000000..69919736 --- /dev/null +++ b/ext/impl/psb_z_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_colsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_colsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_colsum diff --git a/ext/impl/psb_z_dia_csgetptn.f90 b/ext/impl/psb_z_dia_csgetptn.f90 new file mode 100644 index 00000000..d63304f8 --- /dev/null +++ b/ext/impl/psb_z_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csgetptn + implicit none + + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_z_dia_csgetptn diff --git a/ext/impl/psb_z_dia_csgetrow.f90 b/ext/impl/psb_z_dia_csgetrow.f90 new file mode 100644 index 00000000..6571264e --- /dev/null +++ b/ext/impl/psb_z_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csgetrow + implicit none + + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_z_dia_csgetrow diff --git a/ext/impl/psb_z_dia_csmm.f90 b/ext/impl/psb_z_dia_csmm.f90 new file mode 100644 index 00000000..cbebd10e --- /dev/null +++ b/ext/impl/psb_z_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csmm + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_z_dia_csmm_inner + +end subroutine psb_z_dia_csmm diff --git a/ext/impl/psb_z_dia_csmv.f90 b/ext/impl/psb_z_dia_csmv.f90 new file mode 100644 index 00000000..9d1f5a2a --- /dev/null +++ b/ext/impl/psb_z_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csmv + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_z_dia_csmv_inner + +end subroutine psb_z_dia_csmv diff --git a/ext/impl/psb_z_dia_get_diag.f90 b/ext/impl/psb_z_dia_get_diag.f90 new file mode 100644 index 00000000..9b403923 --- /dev/null +++ b/ext/impl/psb_z_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_get_diag + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = zone + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = zzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_get_diag diff --git a/ext/impl/psb_z_dia_maxval.f90 b/ext/impl/psb_z_dia_maxval.f90 new file mode 100644 index 00000000..d3518c17 --- /dev/null +++ b/ext/impl/psb_z_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_z_dia_maxval(a) result(res) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_maxval + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_z_dia_maxval diff --git a/ext/impl/psb_z_dia_mold.f90 b/ext/impl/psb_z_dia_mold.f90 new file mode 100644 index 00000000..421af284 --- /dev/null +++ b/ext/impl/psb_z_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_mold(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_mold + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_mold diff --git a/ext/impl/psb_z_dia_print.f90 b/ext/impl/psb_z_dia_print.f90 new file mode 100644 index 00000000..1f7853ef --- /dev/null +++ b/ext/impl/psb_z_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_dia_print' + logical, parameter :: debug=.false. + + class(psb_z_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_z_dia_print diff --git a/ext/impl/psb_z_dia_reallocate_nz.f90 b/ext/impl/psb_z_dia_reallocate_nz.f90 new file mode 100644 index 00000000..2d204a64 --- /dev/null +++ b/ext/impl/psb_z_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_reallocate_nz diff --git a/ext/impl/psb_z_dia_reinit.f90 b/ext/impl/psb_z_dia_reinit.f90 new file mode 100644 index 00000000..0f58a9ed --- /dev/null +++ b/ext/impl/psb_z_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_reinit(a,clear) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_reinit + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = zzero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_reinit diff --git a/ext/impl/psb_z_dia_rowsum.f90 b/ext/impl/psb_z_dia_rowsum.f90 new file mode 100644 index 00000000..6918ada1 --- /dev/null +++ b/ext/impl/psb_z_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_rowsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_rowsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_rowsum diff --git a/ext/impl/psb_z_dia_scal.f90 b/ext/impl/psb_z_dia_scal.f90 new file mode 100644 index 00000000..65957e60 --- /dev/null +++ b/ext/impl/psb_z_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_scal + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_scal diff --git a/ext/impl/psb_z_dia_scals.f90 b/ext/impl/psb_z_dia_scals.f90 new file mode 100644 index 00000000..895763d9 --- /dev/null +++ b/ext/impl/psb_z_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_scals(d,a,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_scals + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_scals diff --git a/ext/impl/psb_z_dns_mat_impl.f90 b/ext/impl/psb_z_dns_mat_impl.f90 new file mode 100644 index 00000000..b249a3f2 --- /dev/null +++ b/ext/impl/psb_z_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_z_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_z_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csmv + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='z_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call zgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_csmv + + +!> Function csmm: +!! \memberof psb_z_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_z_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csmm + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='z_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call zgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_z_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_z_dns_csnmi(a) result(res) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csnmi + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_dpk_) :: acc + + res = dzero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_z_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_z_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_z_dns_get_diag(a,d,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_get_diag + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = zzero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_z_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_z_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_z_dns_sparse_mat +!! \brief Allocate a class(psb_z_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_z_dns_mold(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_mold + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_z_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_z_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_z_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = zzero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_z_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_z_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csgetrow + implicit none + + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_z_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_z_dns_trim(a) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_trim + implicit none + class(psb_z_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Copy and convert from psb_z_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_z_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_cp_dns_from_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = zzero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = zzero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Copy and convert to psb_z_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_z_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_cp_dns_to_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= zzero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_z_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Convert to psb_z_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_z_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_mv_dns_to_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_z_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Convert from psb_z_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_z_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_mv_dns_from_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_z_mv_dns_from_coo + diff --git a/ext/impl/psb_z_ell_aclsum.f90 b/ext/impl/psb_z_ell_aclsum.f90 new file mode 100644 index 00000000..b03121fd --- /dev/null +++ b/ext/impl/psb_z_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_aclsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_aclsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_aclsum diff --git a/ext/impl/psb_z_ell_allocate_mnnz.f90 b/ext/impl/psb_z_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..f7f7f67e --- /dev/null +++ b/ext/impl/psb_z_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_allocate_mnnz diff --git a/ext/impl/psb_z_ell_arwsum.f90 b/ext/impl/psb_z_ell_arwsum.f90 new file mode 100644 index 00000000..9d4b4949 --- /dev/null +++ b/ext/impl/psb_z_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_arwsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_arwsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = done + else + d(i) = dzero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_arwsum diff --git a/ext/impl/psb_z_ell_colsum.f90 b/ext/impl/psb_z_ell_colsum.f90 new file mode 100644 index 00000000..e9c2bc0b --- /dev/null +++ b/ext/impl/psb_z_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_colsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_colsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_colsum diff --git a/ext/impl/psb_z_ell_csgetblk.f90 b/ext/impl/psb_z_ell_csgetblk.f90 new file mode 100644 index 00000000..d2e56e1d --- /dev/null +++ b/ext/impl/psb_z_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csgetblk + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_csgetblk diff --git a/ext/impl/psb_z_ell_csgetptn.f90 b/ext/impl/psb_z_ell_csgetptn.f90 new file mode 100644 index 00000000..97ed7d90 --- /dev/null +++ b/ext/impl/psb_z_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csgetptn + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_ell_csgetrow + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_ell_csmm + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_ell_csmv + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_ell_csnm1 + + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_dpk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = done + else + vt(:) = dzero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_z_ell_csnm1 diff --git a/ext/impl/psb_z_ell_csnmi.f90 b/ext/impl/psb_z_ell_csnmi.f90 new file mode 100644 index 00000000..ecbfb1e1 --- /dev/null +++ b/ext/impl/psb_z_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csnmi + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + done + res = max(res,acc) + end do + +end function psb_z_ell_csnmi diff --git a/ext/impl/psb_z_ell_csput.f90 b/ext/impl/psb_z_ell_csput.f90 new file mode 100644 index 00000000..cf45070f --- /dev/null +++ b/ext/impl/psb_z_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csput_a + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_z_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_z_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='z_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_z_ell_srch_upd +end subroutine psb_z_ell_csput_a diff --git a/ext/impl/psb_z_ell_cssm.f90 b/ext/impl/psb_z_ell_cssm.f90 new file mode 100644 index 00000000..2e26c656 --- /dev/null +++ b/ext/impl/psb_z_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_cssm + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_z_ell_cssv + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + complex(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_z_ell_get_diag + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = zone + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = zzero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = zzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_get_diag diff --git a/ext/impl/psb_z_ell_maxval.f90 b/ext/impl/psb_z_ell_maxval.f90 new file mode 100644 index 00000000..9596f124 --- /dev/null +++ b/ext/impl/psb_z_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_ell_maxval(a) result(res) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_maxval + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_z_ell_maxval diff --git a/ext/impl/psb_z_ell_mold.f90 b/ext/impl/psb_z_ell_mold.f90 new file mode 100644 index 00000000..3e1db6cc --- /dev/null +++ b/ext/impl/psb_z_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_mold(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_mold + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_mold diff --git a/ext/impl/psb_z_ell_print.f90 b/ext/impl/psb_z_ell_print.f90 new file mode 100644 index 00000000..502abb94 --- /dev/null +++ b/ext/impl/psb_z_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_z_ell_print diff --git a/ext/impl/psb_z_ell_reallocate_nz.f90 b/ext/impl/psb_z_ell_reallocate_nz.f90 new file mode 100644 index 00000000..58237508 --- /dev/null +++ b/ext/impl/psb_z_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_reallocate_nz diff --git a/ext/impl/psb_z_ell_reinit.f90 b/ext/impl/psb_z_ell_reinit.f90 new file mode 100644 index 00000000..d73620d8 --- /dev/null +++ b/ext/impl/psb_z_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_reinit(a,clear) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_reinit + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = zzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_reinit diff --git a/ext/impl/psb_z_ell_rowsum.f90 b/ext/impl/psb_z_ell_rowsum.f90 new file mode 100644 index 00000000..60eb70af --- /dev/null +++ b/ext/impl/psb_z_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_rowsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_rowsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = zone + else + d(i) = zzero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_rowsum diff --git a/ext/impl/psb_z_ell_scal.f90 b/ext/impl/psb_z_ell_scal.f90 new file mode 100644 index 00000000..7f2f8944 --- /dev/null +++ b/ext/impl/psb_z_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_scal + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_scal diff --git a/ext/impl/psb_z_ell_scals.f90 b/ext/impl/psb_z_ell_scals.f90 new file mode 100644 index 00000000..4086d8cc --- /dev/null +++ b/ext/impl/psb_z_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_scals(d,a,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_scals + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_scals diff --git a/ext/impl/psb_z_ell_trim.f90 b/ext/impl/psb_z_ell_trim.f90 new file mode 100644 index 00000000..7cc2ed65 --- /dev/null +++ b/ext/impl/psb_z_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_trim(a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_trim + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_trim diff --git a/ext/impl/psb_z_hdia_allocate_mnnz.f90 b/ext/impl/psb_z_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..abed0c58 --- /dev/null +++ b/ext/impl/psb_z_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hdia_allocate_mnnz diff --git a/ext/impl/psb_z_hdia_csmv.f90 b/ext/impl/psb_z_hdia_csmv.f90 new file mode 100644 index 00000000..73d11da6 --- /dev/null +++ b/ext/impl/psb_z_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_csmv + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_z_inner_dia_csmv + +end subroutine psb_z_hdia_csmv diff --git a/ext/impl/psb_z_hdia_mold.f90 b/ext/impl/psb_z_hdia_mold.f90 new file mode 100644 index 00000000..d91bdb35 --- /dev/null +++ b/ext/impl/psb_z_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hdia_mold(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_mold + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hdia_mold diff --git a/ext/impl/psb_z_hdia_print.f90 b/ext/impl/psb_z_hdia_print.f90 new file mode 100644 index 00000000..46f7769d --- /dev/null +++ b/ext/impl/psb_z_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_z_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + complex(psb_dpk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_z_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_z_hdia_print diff --git a/ext/impl/psb_z_hll_aclsum.f90 b/ext/impl/psb_z_hll_aclsum.f90 new file mode 100644 index 00000000..e4add299 --- /dev/null +++ b/ext/impl/psb_z_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_aclsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_aclsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine z_hll_aclsum + +end subroutine psb_z_hll_aclsum diff --git a/ext/impl/psb_z_hll_allocate_mnnz.f90 b/ext/impl/psb_z_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..6ba9d7f1 --- /dev/null +++ b/ext/impl/psb_z_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_allocate_mnnz diff --git a/ext/impl/psb_z_hll_arwsum.f90 b/ext/impl/psb_z_hll_arwsum.f90 new file mode 100644 index 00000000..a6e020fd --- /dev/null +++ b/ext/impl/psb_z_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_arwsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_arwsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine z_hll_arwsum + +end subroutine psb_z_hll_arwsum diff --git a/ext/impl/psb_z_hll_colsum.f90 b/ext/impl/psb_z_hll_colsum.f90 new file mode 100644 index 00000000..196a694e --- /dev/null +++ b/ext/impl/psb_z_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_colsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_colsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + complex(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine z_hll_colsum + +end subroutine psb_z_hll_colsum diff --git a/ext/impl/psb_z_hll_csgetblk.f90 b/ext/impl/psb_z_hll_csgetblk.f90 new file mode 100644 index 00000000..0cdf1fef --- /dev/null +++ b/ext/impl/psb_z_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csgetblk + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_csgetblk diff --git a/ext/impl/psb_z_hll_csgetptn.f90 b/ext/impl/psb_z_hll_csgetptn.f90 new file mode 100644 index 00000000..9d4c6714 --- /dev/null +++ b/ext/impl/psb_z_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csgetptn + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_hll_csgetrow + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_hll_csmm + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + complex(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_z_hll_csmv + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_z_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_z_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == zone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == zone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*conjg(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == zzero) then + if (beta == zzero) then + do i=1,m + y(ir+i-1) = zzero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == zzero) then + do i=1,m + tmp = zzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = zzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_z_hll_csmv_inner + + subroutine psb_z_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_8 + + subroutine psb_z_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_24 + + subroutine psb_z_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_16 + + subroutine psb_z_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_32 + + subroutine psb_z_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_4 + +end subroutine psb_z_hll_csmv diff --git a/ext/impl/psb_z_hll_csnm1.f90 b/ext/impl/psb_z_hll_csnm1.f90 new file mode 100644 index 00000000..eb5c5b6b --- /dev/null +++ b/ext/impl/psb_z_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csnm1 + + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_dpk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = dzero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = done + else + vt = dzero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_z_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_z_hll_csnm1_inner + +end function psb_z_hll_csnm1 diff --git a/ext/impl/psb_z_hll_csnmi.f90 b/ext/impl/psb_z_hll_csnmi.f90 new file mode 100644 index 00000000..6243e5cf --- /dev/null +++ b/ext/impl/psb_z_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csnmi + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='z_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = dzero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_z_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = done + else + tmp = dzero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_z_hll_csnmi_inner + +end function psb_z_hll_csnmi diff --git a/ext/impl/psb_z_hll_csput.f90 b/ext/impl/psb_z_hll_csput.f90 new file mode 100644 index 00000000..e47664c7 --- /dev/null +++ b/ext/impl/psb_z_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csput_a + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_z_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='z_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_z_hll_srch_upd + +end subroutine psb_z_hll_csput_a diff --git a/ext/impl/psb_z_hll_cssm.f90 b/ext/impl/psb_z_hll_cssm.f90 new file mode 100644 index 00000000..ba1aa150 --- /dev/null +++ b/ext/impl/psb_z_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_cssm + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + complex(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_z_hll_cssv + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + complex(psb_dpk_) :: acc + complex(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_z_hll_get_diag + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = zone + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_z_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = zzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + complex(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = zzero + end if + end do + + end subroutine psb_z_hll_get_diag_inner + +end subroutine psb_z_hll_get_diag diff --git a/ext/impl/psb_z_hll_maxval.f90 b/ext/impl/psb_z_hll_maxval.f90 new file mode 100644 index 00000000..22258c3a --- /dev/null +++ b/ext/impl/psb_z_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_hll_maxval(a) result(res) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_maxval + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,done) + +end function psb_z_hll_maxval diff --git a/ext/impl/psb_z_hll_mold.f90 b/ext/impl/psb_z_hll_mold.f90 new file mode 100644 index 00000000..e108e9ce --- /dev/null +++ b/ext/impl/psb_z_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_mold(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_mold + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_mold diff --git a/ext/impl/psb_z_hll_print.f90 b/ext/impl/psb_z_hll_print.f90 new file mode 100644 index 00000000..43882264 --- /dev/null +++ b/ext/impl/psb_z_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_z_hll_print diff --git a/ext/impl/psb_z_hll_reallocate_nz.f90 b/ext/impl/psb_z_hll_reallocate_nz.f90 new file mode 100644 index 00000000..23432f9f --- /dev/null +++ b/ext/impl/psb_z_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_reallocate_nz diff --git a/ext/impl/psb_z_hll_reinit.f90 b/ext/impl/psb_z_hll_reinit.f90 new file mode 100644 index 00000000..b6851c61 --- /dev/null +++ b/ext/impl/psb_z_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_reinit(a,clear) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_reinit + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = zzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_reinit diff --git a/ext/impl/psb_z_hll_rowsum.f90 b/ext/impl/psb_z_hll_rowsum.f90 new file mode 100644 index 00000000..027c5b22 --- /dev/null +++ b/ext/impl/psb_z_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_rowsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_rowsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + complex(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine z_hll_rowsum + +end subroutine psb_z_hll_rowsum diff --git a/ext/impl/psb_z_hll_scal.f90 b/ext/impl/psb_z_hll_scal.f90 new file mode 100644 index 00000000..a11d0da8 --- /dev/null +++ b/ext/impl/psb_z_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_scal + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_z_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: d(*) + complex(psb_dpk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_z_hll_scal_inner + + +end subroutine psb_z_hll_scal diff --git a/ext/impl/psb_z_hll_scals.f90 b/ext/impl/psb_z_hll_scals.f90 new file mode 100644 index 00000000..432f11e6 --- /dev/null +++ b/ext/impl/psb_z_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_scals(d,a,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_scals + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_scals diff --git a/ext/impl/psb_z_mv_dia_from_coo.f90 b/ext/impl/psb_z_mv_dia_from_coo.f90 new file mode 100644 index 00000000..29e27dfc --- /dev/null +++ b/ext/impl/psb_z_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_mv_dia_from_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_dia_from_coo diff --git a/ext/impl/psb_z_mv_dia_to_coo.f90 b/ext/impl/psb_z_mv_dia_to_coo.f90 new file mode 100644 index 00000000..1679c9e0 --- /dev/null +++ b/ext/impl/psb_z_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_mv_dia_to_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_dia_to_coo diff --git a/ext/impl/psb_z_mv_ell_from_coo.f90 b/ext/impl/psb_z_mv_ell_from_coo.f90 new file mode 100644 index 00000000..de39604e --- /dev/null +++ b/ext/impl/psb_z_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_from_coo + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_z_mv_ell_from_coo diff --git a/ext/impl/psb_z_mv_ell_from_fmt.f90 b/ext/impl/psb_z_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..a2c7c190 --- /dev/null +++ b/ext/impl/psb_z_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_from_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_z_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_ell_from_fmt diff --git a/ext/impl/psb_z_mv_ell_to_coo.f90 b/ext/impl/psb_z_mv_ell_to_coo.f90 new file mode 100644 index 00000000..3f8afb0a --- /dev/null +++ b/ext/impl/psb_z_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_to_coo + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_ell_to_coo diff --git a/ext/impl/psb_z_mv_ell_to_fmt.f90 b/ext/impl/psb_z_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..d34ae80e --- /dev/null +++ b/ext/impl/psb_z_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_to_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_z_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_ell_to_fmt diff --git a/ext/impl/psb_z_mv_hdia_from_coo.f90 b/ext/impl/psb_z_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..b9593f34 --- /dev/null +++ b/ext/impl/psb_z_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_mv_hdia_from_coo + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_hdia_from_coo diff --git a/ext/impl/psb_z_mv_hdia_to_coo.f90 b/ext/impl/psb_z_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..f4c8df55 --- /dev/null +++ b/ext/impl/psb_z_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_mv_hdia_to_coo + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_hdia_to_coo diff --git a/ext/impl/psb_z_mv_hll_from_coo.f90 b/ext/impl/psb_z_mv_hll_from_coo.f90 new file mode 100644 index 00000000..abe988b3 --- /dev/null +++ b/ext/impl/psb_z_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_from_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_hll_from_coo diff --git a/ext/impl/psb_z_mv_hll_from_fmt.f90 b/ext/impl/psb_z_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..81626aba --- /dev/null +++ b/ext/impl/psb_z_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_from_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_z_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_hll_from_fmt diff --git a/ext/impl/psb_z_mv_hll_to_coo.f90 b/ext/impl/psb_z_mv_hll_to_coo.f90 new file mode 100644 index 00000000..af033004 --- /dev/null +++ b/ext/impl/psb_z_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_to_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_hll_to_coo diff --git a/ext/impl/psb_z_mv_hll_to_fmt.f90 b/ext/impl/psb_z_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..a2fd7027 --- /dev/null +++ b/ext/impl/psb_z_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_to_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_z_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_hll_to_fmt diff --git a/ext/impl/psi_c_convert_dia_from_coo.f90 b/ext/impl/psi_c_convert_dia_from_coo.f90 new file mode 100644 index 00000000..29565748 --- /dev/null +++ b/ext/impl/psi_c_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psi_c_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_c_convert_dia_from_coo diff --git a/ext/impl/psi_c_convert_ell_from_coo.f90 b/ext/impl/psi_c_convert_ell_from_coo.f90 new file mode 100644 index 00000000..b4e0c7e4 --- /dev/null +++ b/ext/impl/psi_c_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psi_c_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_c_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_c_convert_ell_from_coo + diff --git a/ext/impl/psi_c_convert_hll_from_coo.f90 b/ext/impl/psi_c_convert_hll_from_coo.f90 new file mode 100644 index 00000000..2ebb86a6 --- /dev/null +++ b/ext/impl/psi_c_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psi_c_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = czero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_c_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_c_convert_hll_from_coo diff --git a/ext/impl/psi_c_xtr_coo_from_dia.f90 b/ext/impl/psi_c_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..eab82a11 --- /dev/null +++ b/ext/impl/psi_c_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_c_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, czero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + complex(psb_spk_), intent(inout) :: val(:) + complex(psb_spk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_c_xtr_coo_from_dia + diff --git a/ext/impl/psi_c_xtr_dia_from_coo.f90 b/ext/impl/psi_c_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..f72a03df --- /dev/null +++ b/ext/impl/psi_c_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_c_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, czero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + complex(psb_spk_), intent(in) :: val(:) + complex(psb_spk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = czero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_c_xtr_dia_from_coo diff --git a/ext/impl/psi_c_xtr_ell_from_coo.f90 b/ext/impl/psi_c_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..706e6c1f --- /dev/null +++ b/ext/impl/psi_c_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, czero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + complex(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = czero + end do + end do +end subroutine psi_c_xtr_ell_from_coo + diff --git a/ext/impl/psi_d_convert_dia_from_coo.f90 b/ext/impl/psi_d_convert_dia_from_coo.f90 new file mode 100644 index 00000000..5f821967 --- /dev/null +++ b/ext/impl/psi_d_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psi_d_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_d_convert_dia_from_coo diff --git a/ext/impl/psi_d_convert_ell_from_coo.f90 b/ext/impl/psi_d_convert_ell_from_coo.f90 new file mode 100644 index 00000000..51471c19 --- /dev/null +++ b/ext/impl/psi_d_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psi_d_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_d_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_d_convert_ell_from_coo + diff --git a/ext/impl/psi_d_convert_hll_from_coo.f90 b/ext/impl/psi_d_convert_hll_from_coo.f90 new file mode 100644 index 00000000..cb07e52c --- /dev/null +++ b/ext/impl/psi_d_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psi_d_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = dzero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_d_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_d_convert_hll_from_coo diff --git a/ext/impl/psi_d_xtr_coo_from_dia.f90 b/ext/impl/psi_d_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..5fc98b82 --- /dev/null +++ b/ext/impl/psi_d_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_d_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, dzero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + real(psb_dpk_), intent(inout) :: val(:) + real(psb_dpk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_d_xtr_coo_from_dia + diff --git a/ext/impl/psi_d_xtr_dia_from_coo.f90 b/ext/impl/psi_d_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..cd95b64e --- /dev/null +++ b/ext/impl/psi_d_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_d_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, dzero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + real(psb_dpk_), intent(in) :: val(:) + real(psb_dpk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = dzero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_d_xtr_dia_from_coo diff --git a/ext/impl/psi_d_xtr_ell_from_coo.f90 b/ext/impl/psi_d_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..ec520797 --- /dev/null +++ b/ext/impl/psi_d_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, dzero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + real(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = dzero + end do + end do +end subroutine psi_d_xtr_ell_from_coo + diff --git a/ext/impl/psi_s_convert_dia_from_coo.f90 b/ext/impl/psi_s_convert_dia_from_coo.f90 new file mode 100644 index 00000000..d2f58778 --- /dev/null +++ b/ext/impl/psi_s_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psi_s_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_s_convert_dia_from_coo diff --git a/ext/impl/psi_s_convert_ell_from_coo.f90 b/ext/impl/psi_s_convert_ell_from_coo.f90 new file mode 100644 index 00000000..ecdd9b1e --- /dev/null +++ b/ext/impl/psi_s_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psi_s_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_s_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_s_convert_ell_from_coo + diff --git a/ext/impl/psi_s_convert_hll_from_coo.f90 b/ext/impl/psi_s_convert_hll_from_coo.f90 new file mode 100644 index 00000000..dcf6c4e2 --- /dev/null +++ b/ext/impl/psi_s_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psi_s_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = szero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_s_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_s_convert_hll_from_coo diff --git a/ext/impl/psi_s_xtr_coo_from_dia.f90 b/ext/impl/psi_s_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..3745365b --- /dev/null +++ b/ext/impl/psi_s_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_s_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, szero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + real(psb_spk_), intent(inout) :: val(:) + real(psb_spk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_s_xtr_coo_from_dia + diff --git a/ext/impl/psi_s_xtr_dia_from_coo.f90 b/ext/impl/psi_s_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..a8ee7c4b --- /dev/null +++ b/ext/impl/psi_s_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_s_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, szero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + real(psb_spk_), intent(in) :: val(:) + real(psb_spk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = szero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_s_xtr_dia_from_coo diff --git a/ext/impl/psi_s_xtr_ell_from_coo.f90 b/ext/impl/psi_s_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..0bac2ec0 --- /dev/null +++ b/ext/impl/psi_s_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, szero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + real(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = szero + end do + end do +end subroutine psi_s_xtr_ell_from_coo + diff --git a/ext/impl/psi_z_convert_dia_from_coo.f90 b/ext/impl/psi_z_convert_dia_from_coo.f90 new file mode 100644 index 00000000..ddc9d2fd --- /dev/null +++ b/ext/impl/psi_z_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psi_z_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_z_convert_dia_from_coo diff --git a/ext/impl/psi_z_convert_ell_from_coo.f90 b/ext/impl/psi_z_convert_ell_from_coo.f90 new file mode 100644 index 00000000..3d37c11f --- /dev/null +++ b/ext/impl/psi_z_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psi_z_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_z_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_z_convert_ell_from_coo + diff --git a/ext/impl/psi_z_convert_hll_from_coo.f90 b/ext/impl/psi_z_convert_hll_from_coo.f90 new file mode 100644 index 00000000..bc9fdde1 --- /dev/null +++ b/ext/impl/psi_z_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psi_z_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = zzero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_z_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_z_convert_hll_from_coo diff --git a/ext/impl/psi_z_xtr_coo_from_dia.f90 b/ext/impl/psi_z_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..70d0938f --- /dev/null +++ b/ext/impl/psi_z_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_z_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, zzero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + complex(psb_dpk_), intent(inout) :: val(:) + complex(psb_dpk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_z_xtr_coo_from_dia + diff --git a/ext/impl/psi_z_xtr_dia_from_coo.f90 b/ext/impl/psi_z_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..6b2542c6 --- /dev/null +++ b/ext/impl/psi_z_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_z_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, zzero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + complex(psb_dpk_), intent(in) :: val(:) + complex(psb_dpk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = zzero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_z_xtr_dia_from_coo diff --git a/ext/impl/psi_z_xtr_ell_from_coo.f90 b/ext/impl/psi_z_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..7133f2ae --- /dev/null +++ b/ext/impl/psi_z_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, zzero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + complex(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = zzero + end do + end do +end subroutine psi_z_xtr_ell_from_coo + diff --git a/ext/psb_c_dia_mat_mod.f90 b/ext/psb_c_dia_mat_mod.f90 new file mode 100644 index 00000000..8311487b --- /dev/null +++ b/ext/psb_c_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_dia_mat_mod + + use psb_c_base_mat_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + complex(psb_spk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => c_dia_get_size + procedure, pass(a) :: get_nzeros => c_dia_get_nzeros + procedure, nopass :: get_fmt => c_dia_get_fmt + procedure, pass(a) :: sizeof => c_dia_sizeof + procedure, pass(a) :: csmm => psb_c_dia_csmm + procedure, pass(a) :: csmv => psb_c_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_c_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_c_dia_cssv + procedure, pass(a) :: scals => psb_c_dia_scals + procedure, pass(a) :: scalv => psb_c_dia_scal + procedure, pass(a) :: maxval => psb_c_dia_maxval + procedure, pass(a) :: rowsum => psb_c_dia_rowsum + procedure, pass(a) :: arwsum => psb_c_dia_arwsum + procedure, pass(a) :: colsum => psb_c_dia_colsum + procedure, pass(a) :: aclsum => psb_c_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_c_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_c_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_c_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_c_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_c_dia_csput_a + procedure, pass(a) :: get_diag => psb_c_dia_get_diag + procedure, pass(a) :: csgetptn => psb_c_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_c_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => c_dia_get_nz_row + procedure, pass(a) :: reinit => psb_c_dia_reinit + ! procedure, pass(a) :: trim => psb_c_dia_trim + procedure, pass(a) :: print => psb_c_dia_print + procedure, pass(a) :: free => c_dia_free + procedure, pass(a) :: mold => psb_c_dia_mold + + end type psb_c_dia_sparse_mat + + private :: c_dia_get_nzeros, c_dia_free, c_dia_get_fmt, & + & c_dia_sizeof !, c_dia_get_size, c_dia_get_nz_row + + interface + subroutine psb_c_dia_reallocate_nz(nz,a) + import :: psb_c_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dia_sparse_mat), intent(inout) :: a + end subroutine psb_c_dia_reallocate_nz + end interface + + interface + subroutine psb_c_dia_reinit(a,clear) + import :: psb_c_dia_sparse_mat + class(psb_c_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_c_dia_reinit + end interface + + interface + subroutine psb_c_dia_trim(a) + import :: psb_c_dia_sparse_mat + class(psb_c_dia_sparse_mat), intent(inout) :: a + end subroutine psb_c_dia_trim + end interface + + interface + subroutine psb_c_dia_mold(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_mold + end interface + + interface + subroutine psb_c_dia_allocate_mnnz(m,n,a,nz) + import :: psb_c_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_dia_allocate_mnnz + end interface + + interface + subroutine psb_c_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_dia_print + end interface + + interface + subroutine psb_c_cp_dia_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_dia_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_to_coo + end interface + + interface + subroutine psb_c_cp_dia_from_coo(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_from_coo + end interface + + interface + subroutine psb_c_cp_dia_to_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_to_fmt + end interface + + interface + subroutine psb_c_cp_dia_from_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_from_fmt + end interface + + interface + subroutine psb_c_mv_dia_to_coo(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_to_coo + end interface + + interface + subroutine psb_c_mv_dia_from_coo(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_from_coo + end interface + + interface + subroutine psb_c_mv_dia_to_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_to_fmt + end interface + + interface + subroutine psb_c_mv_dia_from_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_from_fmt + end interface + + interface + subroutine psb_c_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_csput_a + end interface + + interface + subroutine psb_c_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_dia_csgetptn + end interface + + interface + subroutine psb_c_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_dia_csgetrow + end interface + + interface + subroutine psb_c_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_dia_csgetblk + end interface + + interface + subroutine psb_c_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_cssv + subroutine psb_c_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_cssm + end interface + + interface + subroutine psb_c_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_csmv + subroutine psb_c_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_csmm + end interface + + + interface + function psb_c_dia_maxval(a) result(res) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dia_maxval + end interface + + interface + function psb_c_dia_csnmi(a) result(res) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dia_csnmi + end interface + + interface + function psb_c_dia_csnm1(a) result(res) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dia_csnm1 + end interface + + interface + subroutine psb_c_dia_rowsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_rowsum + end interface + + interface + subroutine psb_c_dia_arwsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_arwsum + end interface + + interface + subroutine psb_c_dia_colsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_colsum + end interface + + interface + subroutine psb_c_dia_aclsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_aclsum + end interface + + interface + subroutine psb_c_dia_get_diag(a,d,info) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_get_diag + end interface + + interface + subroutine psb_c_dia_scal(d,a,info,side) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_dia_scal + end interface + + interface + subroutine psb_c_dia_scals(d,a,info) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_c_convert_dia_from_coo(a,tmp,info) + import :: psb_c_dia_sparse_mat, psb_ipk_, psb_c_coo_sparse_mat + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_c_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_dia_sizeof(a) result(res) + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function c_dia_sizeof + + function c_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function c_dia_get_fmt + + function c_dia_get_nzeros(a) result(res) + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function c_dia_get_nzeros + + ! function c_dia_get_size(a) result(res) + ! implicit none + ! class(psb_c_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function c_dia_get_size + + + ! function c_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_c_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function c_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine c_dia_free(a) + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_dia_free + + +end module psb_c_dia_mat_mod diff --git a/ext/psb_c_dns_mat_mod.f90 b/ext/psb_c_dns_mat_mod.f90 new file mode 100644 index 00000000..5e5a191d --- /dev/null +++ b/ext/psb_c_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_c_dns_mat_mod + + use psb_c_base_mat_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_spk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + complex(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => c_dns_get_size + procedure, pass(a) :: get_nzeros => c_dns_get_nzeros + procedure, nopass :: get_fmt => c_dns_get_fmt + procedure, pass(a) :: sizeof => c_dns_sizeof + procedure, pass(a) :: csmv => psb_c_dns_csmv + procedure, pass(a) :: csmm => psb_c_dns_csmm + procedure, pass(a) :: csnmi => psb_c_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_c_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_c_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_c_dns_get_diag + procedure, pass(a) :: csgetrow => psb_c_dns_csgetrow + procedure, pass(a) :: get_nz_row => c_dns_get_nz_row + procedure, pass(a) :: trim => psb_c_dns_trim + procedure, pass(a) :: free => c_dns_free + procedure, pass(a) :: mold => psb_c_dns_mold + + end type psb_c_dns_sparse_mat + + private :: c_dns_get_nzeros, c_dns_free, c_dns_get_fmt, & + & c_dns_get_size, c_dns_sizeof, c_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_c_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_c_dns_reallocate_nz(nz,a) + import :: psb_c_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dns_sparse_mat), intent(inout) :: a + end subroutine psb_c_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_c_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_c_dns_trim(a) + import :: psb_c_dns_sparse_mat + class(psb_c_dns_sparse_mat), intent(inout) :: a + end subroutine psb_c_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_c_dns_sparse_mat + !! \brief Allocate a class(psb_c_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_c_dns_mold(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_c_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_c_dns_allocate_mnnz(m,n,a,nz) + import :: psb_c_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Copy and convert to psb_c_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_c_cp_dns_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_dns_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Copy and convert from psb_c_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_c_cp_dns_from_coo(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Convert to psb_c_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_c_mv_dns_to_coo(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Convert from psb_c_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_c_mv_dns_from_coo(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_c_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_c_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_c_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_c_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_c_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_c_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_c_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_c_dns_csnmi(a) result(res) + import :: psb_c_dns_sparse_mat, psb_spk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_c_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_c_dns_get_diag(a,d,info) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_c_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function c_dns_sizeof(a) result(res) + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function c_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_c_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function c_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function c_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_c_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function c_dns_get_nzeros(a) result(res) + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function c_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_c_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function c_dns_get_size(a) result(res) + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function c_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_c_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function c_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function c_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_c_dns_sparse_mat + !! Name says all + + subroutine c_dns_free(a) + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_dns_free + + +end module psb_c_dns_mat_mod diff --git a/ext/psb_c_ell_mat_mod.f90 b/ext/psb_c_ell_mat_mod.f90 new file mode 100644 index 00000000..8eaf01ba --- /dev/null +++ b/ext/psb_c_ell_mat_mod.f90 @@ -0,0 +1,544 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_ell_mat_mod + + use psb_c_base_mat_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + complex(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => c_ell_is_by_rows + procedure, pass(a) :: get_size => c_ell_get_size + procedure, pass(a) :: get_nzeros => c_ell_get_nzeros + procedure, nopass :: get_fmt => c_ell_get_fmt + procedure, pass(a) :: sizeof => c_ell_sizeof + procedure, pass(a) :: csmm => psb_c_ell_csmm + procedure, pass(a) :: csmv => psb_c_ell_csmv + procedure, pass(a) :: inner_cssm => psb_c_ell_cssm + procedure, pass(a) :: inner_cssv => psb_c_ell_cssv + procedure, pass(a) :: scals => psb_c_ell_scals + procedure, pass(a) :: scalv => psb_c_ell_scal + procedure, pass(a) :: maxval => psb_c_ell_maxval + procedure, pass(a) :: csnmi => psb_c_ell_csnmi + procedure, pass(a) :: csnm1 => psb_c_ell_csnm1 + procedure, pass(a) :: rowsum => psb_c_ell_rowsum + procedure, pass(a) :: arwsum => psb_c_ell_arwsum + procedure, pass(a) :: colsum => psb_c_ell_colsum + procedure, pass(a) :: aclsum => psb_c_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_c_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_c_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_c_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_c_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_c_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_c_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_c_ell_csput_a + procedure, pass(a) :: get_diag => psb_c_ell_get_diag + procedure, pass(a) :: csgetptn => psb_c_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_c_ell_csgetrow + procedure, pass(a) :: get_nz_row => c_ell_get_nz_row + procedure, pass(a) :: reinit => psb_c_ell_reinit + procedure, pass(a) :: trim => psb_c_ell_trim + procedure, pass(a) :: print => psb_c_ell_print + procedure, pass(a) :: free => c_ell_free + procedure, pass(a) :: mold => psb_c_ell_mold + + end type psb_c_ell_sparse_mat + + private :: c_ell_get_nzeros, c_ell_free, c_ell_get_fmt, & + & c_ell_get_size, c_ell_sizeof, c_ell_get_nz_row, & + & c_ell_is_by_rows + + interface + subroutine psb_c_ell_reallocate_nz(nz,a) + import :: psb_c_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_ell_sparse_mat), intent(inout) :: a + end subroutine psb_c_ell_reallocate_nz + end interface + + interface + subroutine psb_c_ell_reinit(a,clear) + import :: psb_c_ell_sparse_mat + class(psb_c_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_c_ell_reinit + end interface + + interface + subroutine psb_c_ell_trim(a) + import :: psb_c_ell_sparse_mat + class(psb_c_ell_sparse_mat), intent(inout) :: a + end subroutine psb_c_ell_trim + end interface + + interface + subroutine psb_c_ell_mold(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_mold + end interface + + interface + subroutine psb_c_ell_allocate_mnnz(m,n,a,nz) + import :: psb_c_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_ell_allocate_mnnz + end interface + + interface + subroutine psb_c_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_ell_print + end interface + + interface + subroutine psb_c_cp_ell_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_ell_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_to_coo + end interface + + interface + subroutine psb_c_cp_ell_from_coo(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_from_coo + end interface + + interface + subroutine psb_c_cp_ell_to_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_to_fmt + end interface + + interface + subroutine psb_c_cp_ell_from_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_from_fmt + end interface + + interface + subroutine psb_c_mv_ell_to_coo(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_to_coo + end interface + + interface + subroutine psb_c_mv_ell_from_coo(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_from_coo + end interface + + interface + subroutine psb_c_mv_ell_to_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_to_fmt + end interface + + interface + subroutine psb_c_mv_ell_from_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_from_fmt + end interface + + interface + subroutine psb_c_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_csput_a + end interface + + interface + subroutine psb_c_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_ell_csgetptn + end interface + + interface + subroutine psb_c_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_ell_csgetrow + end interface + + interface + subroutine psb_c_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_ell_csgetblk + end interface + + interface + subroutine psb_c_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_cssv + subroutine psb_c_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_cssm + end interface + + interface + subroutine psb_c_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_csmv + subroutine psb_c_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_csmm + end interface + + + interface + function psb_c_ell_maxval(a) result(res) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_ell_maxval + end interface + + interface + function psb_c_ell_csnmi(a) result(res) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_ell_csnmi + end interface + + interface + function psb_c_ell_csnm1(a) result(res) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_ell_csnm1 + end interface + + interface + subroutine psb_c_ell_rowsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_rowsum + end interface + + interface + subroutine psb_c_ell_arwsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_arwsum + end interface + + interface + subroutine psb_c_ell_colsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_colsum + end interface + + interface + subroutine psb_c_ell_aclsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_aclsum + end interface + + interface + subroutine psb_c_ell_get_diag(a,d,info) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_get_diag + end interface + + interface + subroutine psb_c_ell_scal(d,a,info,side) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_ell_scal + end interface + + interface + subroutine psb_c_ell_scals(d,a,info) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_scals + end interface + + interface + subroutine psi_c_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_c_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_ell_is_by_rows(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function c_ell_is_by_rows + + function c_ell_sizeof(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function c_ell_sizeof + + function c_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function c_ell_get_fmt + + function c_ell_get_nzeros(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function c_ell_get_nzeros + + function c_ell_get_size(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function c_ell_get_size + + + function c_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function c_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_ell_free(a) + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_ell_free + + +end module psb_c_ell_mat_mod diff --git a/ext/psb_c_hdia_mat_mod.f90 b/ext/psb_c_hdia_mat_mod.f90 new file mode 100644 index 00000000..fbac05de --- /dev/null +++ b/ext/psb_c_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_c_hdia_mat_mod + + use psb_c_base_mat_mod + + + type, extends(psb_c_base_sparse_mat) :: psb_c_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + complex(psb_spk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => c_hdia_get_size + procedure, pass(a) :: get_nzeros => c_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => c_hdia_set_nzeros + procedure, nopass :: get_fmt => c_hdia_get_fmt + procedure, pass(a) :: sizeof => c_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_c_hdia_csmm + procedure, pass(a) :: csmv => psb_c_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_c_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_c_hdia_cssv + ! procedure, pass(a) :: scals => psb_c_hdia_scals + ! procedure, pass(a) :: scalv => psb_c_hdia_scal + ! procedure, pass(a) :: maxval => psb_c_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_c_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_c_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_c_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_c_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_c_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_c_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_c_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_c_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_c_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_c_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_c_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_c_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_c_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_c_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_c_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_c_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => c_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_c_hdia_reinit + ! procedure, pass(a) :: trim => psb_c_hdia_trim + procedure, pass(a) :: print => psb_c_hdia_print + procedure, pass(a) :: free => c_hdia_free + procedure, pass(a) :: mold => psb_c_hdia_mold + + end type psb_c_hdia_sparse_mat + + private :: c_hdia_get_nzeros, c_hdia_set_nzeros, c_hdia_free, & + & c_hdia_get_fmt, c_hdia_sizeof +!!$ & +!!$ & c_hdia_get_nz_row c_hdia_get_size, + +!!$ interface +!!$ subroutine psb_c_hdia_reallocate_nz(nz,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_reinit(a,clear) +!!$ import :: psb_c_hdia_sparse_mat +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_c_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_trim(a) +!!$ import :: psb_c_hdia_sparse_mat +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_hdia_trim +!!$ end interface + + interface + subroutine psb_c_hdia_mold(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hdia_mold + end interface + + interface + subroutine psb_c_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_c_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_hdia_allocate_mnnz + end interface + + interface + subroutine psb_c_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_hdia_print + end interface + + interface + subroutine psb_c_cp_hdia_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_hdia_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hdia_to_coo + end interface + + interface + subroutine psb_c_cp_hdia_from_coo(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_c_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_c_mv_hdia_to_coo(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hdia_to_coo + end interface + + interface + subroutine psb_c_mv_hdia_from_coo(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_c_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_c_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ complex(psb_spk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_c_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_c_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_c_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdia_cssv +!!$ subroutine psb_c_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdia_cssm +!!$ end interface + + interface + subroutine psb_c_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hdia_csmv +!!$ subroutine psb_c_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_c_hdia_maxval(a) result(res) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_c_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_c_hdia_csnmi(a) result(res) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_c_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_c_hdia_csnm1(a) result(res) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_c_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_rowsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_arwsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_colsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_aclsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_get_diag(a,d,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_scal(d,a,info,side) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_c_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_c_hdia_scals(d,a,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * (2*psb_sizeof_sp) + + end function c_hdia_sizeof + + function c_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function c_hdia_get_fmt + + function c_hdia_get_nzeros(a) result(res) + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function c_hdia_get_nzeros + + subroutine c_hdia_set_nzeros(a,nz) + implicit none + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine c_hdia_set_nzeros + + ! function c_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_c_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function c_hdia_get_size + + + ! function c_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_c_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function c_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine c_hdia_free(a) + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_hdia_free + + +end module psb_c_hdia_mat_mod diff --git a/ext/psb_c_hll_mat_mod.f90 b/ext/psb_c_hll_mat_mod.f90 new file mode 100644 index 00000000..966b60f5 --- /dev/null +++ b/ext/psb_c_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_hll_mat_mod + + use psb_c_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + complex(psb_spk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => c_hll_get_hksz + procedure, pass(a) :: set_hksz => c_hll_set_hksz + procedure, pass(a) :: get_size => c_hll_get_size + procedure, pass(a) :: set_nzeros => c_hll_set_nzeros + procedure, pass(a) :: get_nzeros => c_hll_get_nzeros + procedure, nopass :: get_fmt => c_hll_get_fmt + procedure, pass(a) :: sizeof => c_hll_sizeof + procedure, pass(a) :: csmm => psb_c_hll_csmm + procedure, pass(a) :: csmv => psb_c_hll_csmv + procedure, pass(a) :: inner_cssm => psb_c_hll_cssm + procedure, pass(a) :: inner_cssv => psb_c_hll_cssv + procedure, pass(a) :: scals => psb_c_hll_scals + procedure, pass(a) :: scalv => psb_c_hll_scal + procedure, pass(a) :: maxval => psb_c_hll_maxval + procedure, pass(a) :: csnmi => psb_c_hll_csnmi + procedure, pass(a) :: csnm1 => psb_c_hll_csnm1 + procedure, pass(a) :: rowsum => psb_c_hll_rowsum + procedure, pass(a) :: arwsum => psb_c_hll_arwsum + procedure, pass(a) :: colsum => psb_c_hll_colsum + procedure, pass(a) :: aclsum => psb_c_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_c_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_c_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_c_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_c_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_c_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_c_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_c_hll_csput_a + procedure, pass(a) :: get_diag => psb_c_hll_get_diag + procedure, pass(a) :: csgetptn => psb_c_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_c_hll_csgetrow + procedure, pass(a) :: get_nz_row => c_hll_get_nz_row + procedure, pass(a) :: reinit => psb_c_hll_reinit + procedure, pass(a) :: print => psb_c_hll_print + procedure, pass(a) :: free => c_hll_free + procedure, pass(a) :: mold => psb_c_hll_mold + + end type psb_c_hll_sparse_mat + + private :: c_hll_get_nzeros, c_hll_free, c_hll_get_fmt, & + & c_hll_get_size, c_hll_sizeof, c_hll_get_nz_row, & + & c_hll_set_nzeros, c_hll_get_hksz, c_hll_set_hksz + + interface + subroutine psb_c_hll_reallocate_nz(nz,a) + import :: psb_c_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hll_sparse_mat), intent(inout) :: a + end subroutine psb_c_hll_reallocate_nz + end interface + + interface + subroutine psb_c_hll_reinit(a,clear) + import :: psb_c_hll_sparse_mat + class(psb_c_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_c_hll_reinit + end interface + + interface + subroutine psb_c_hll_mold(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_mold + end interface + + interface + subroutine psb_c_hll_allocate_mnnz(m,n,a,nz) + import :: psb_c_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_hll_allocate_mnnz + end interface + + interface + subroutine psb_c_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_hll_print + end interface + + interface + subroutine psb_c_cp_hll_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_hll_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_to_coo + end interface + + interface + subroutine psb_c_cp_hll_from_coo(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_from_coo + end interface + + interface + subroutine psb_c_cp_hll_to_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_to_fmt + end interface + + interface + subroutine psb_c_cp_hll_from_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_from_fmt + end interface + + interface + subroutine psb_c_mv_hll_to_coo(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_to_coo + end interface + + interface + subroutine psb_c_mv_hll_from_coo(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_from_coo + end interface + + interface + subroutine psb_c_mv_hll_to_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_to_fmt + end interface + + interface + subroutine psb_c_mv_hll_from_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_from_fmt + end interface + + interface + subroutine psb_c_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_csput_a + end interface + + interface + subroutine psb_c_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_hll_csgetptn + end interface + + interface + subroutine psb_c_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_hll_csgetrow + end interface + + interface + subroutine psb_c_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_hll_csgetblk + end interface + + interface + subroutine psb_c_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_cssv + subroutine psb_c_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_cssm + end interface + + interface + subroutine psb_c_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_csmv + subroutine psb_c_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_csmm + end interface + + + interface + function psb_c_hll_maxval(a) result(res) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_hll_maxval + end interface + + interface + function psb_c_hll_csnmi(a) result(res) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_hll_csnmi + end interface + + interface + function psb_c_hll_csnm1(a) result(res) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_hll_csnm1 + end interface + + interface + subroutine psb_c_hll_rowsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_rowsum + end interface + + interface + subroutine psb_c_hll_arwsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_arwsum + end interface + + interface + subroutine psb_c_hll_colsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_colsum + end interface + + interface + subroutine psb_c_hll_aclsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_aclsum + end interface + + interface + subroutine psb_c_hll_get_diag(a,d,info) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_get_diag + end interface + + interface + subroutine psb_c_hll_scal(d,a,info,side) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_hll_scal + end interface + + interface + subroutine psb_c_hll_scals(d,a,info) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_c_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_c_hll_sparse_mat, psb_ipk_, psb_c_coo_sparse_mat + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_c_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_hll_sizeof(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function c_hll_sizeof + + function c_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function c_hll_get_fmt + + subroutine c_hll_set_nzeros(a,n) + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine c_hll_set_nzeros + + function c_hll_get_nzeros(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function c_hll_get_nzeros + + function c_hll_get_size(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function c_hll_get_size + + + + function c_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function c_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_hll_free(a) + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine c_hll_free + + subroutine c_hll_set_hksz(a,n) + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine c_hll_set_hksz + + function c_hll_get_hksz(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function c_hll_get_hksz + +end module psb_c_hll_mat_mod diff --git a/ext/psb_d_dia_mat_mod.f90 b/ext/psb_d_dia_mat_mod.f90 new file mode 100644 index 00000000..7df615ac --- /dev/null +++ b/ext/psb_d_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_dia_mat_mod + + use psb_d_base_mat_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + real(psb_dpk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => d_dia_get_size + procedure, pass(a) :: get_nzeros => d_dia_get_nzeros + procedure, nopass :: get_fmt => d_dia_get_fmt + procedure, pass(a) :: sizeof => d_dia_sizeof + procedure, pass(a) :: csmm => psb_d_dia_csmm + procedure, pass(a) :: csmv => psb_d_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_d_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_d_dia_cssv + procedure, pass(a) :: scals => psb_d_dia_scals + procedure, pass(a) :: scalv => psb_d_dia_scal + procedure, pass(a) :: maxval => psb_d_dia_maxval + procedure, pass(a) :: rowsum => psb_d_dia_rowsum + procedure, pass(a) :: arwsum => psb_d_dia_arwsum + procedure, pass(a) :: colsum => psb_d_dia_colsum + procedure, pass(a) :: aclsum => psb_d_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_d_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_d_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_d_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_d_dia_csput_a + procedure, pass(a) :: get_diag => psb_d_dia_get_diag + procedure, pass(a) :: csgetptn => psb_d_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_d_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => d_dia_get_nz_row + procedure, pass(a) :: reinit => psb_d_dia_reinit + ! procedure, pass(a) :: trim => psb_d_dia_trim + procedure, pass(a) :: print => psb_d_dia_print + procedure, pass(a) :: free => d_dia_free + procedure, pass(a) :: mold => psb_d_dia_mold + + end type psb_d_dia_sparse_mat + + private :: d_dia_get_nzeros, d_dia_free, d_dia_get_fmt, & + & d_dia_sizeof !, d_dia_get_size, d_dia_get_nz_row + + interface + subroutine psb_d_dia_reallocate_nz(nz,a) + import :: psb_d_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dia_sparse_mat), intent(inout) :: a + end subroutine psb_d_dia_reallocate_nz + end interface + + interface + subroutine psb_d_dia_reinit(a,clear) + import :: psb_d_dia_sparse_mat + class(psb_d_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_d_dia_reinit + end interface + + interface + subroutine psb_d_dia_trim(a) + import :: psb_d_dia_sparse_mat + class(psb_d_dia_sparse_mat), intent(inout) :: a + end subroutine psb_d_dia_trim + end interface + + interface + subroutine psb_d_dia_mold(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_mold + end interface + + interface + subroutine psb_d_dia_allocate_mnnz(m,n,a,nz) + import :: psb_d_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_dia_allocate_mnnz + end interface + + interface + subroutine psb_d_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_dia_print + end interface + + interface + subroutine psb_d_cp_dia_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_dia_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_to_coo + end interface + + interface + subroutine psb_d_cp_dia_from_coo(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_from_coo + end interface + + interface + subroutine psb_d_cp_dia_to_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_to_fmt + end interface + + interface + subroutine psb_d_cp_dia_from_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_from_fmt + end interface + + interface + subroutine psb_d_mv_dia_to_coo(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_to_coo + end interface + + interface + subroutine psb_d_mv_dia_from_coo(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_from_coo + end interface + + interface + subroutine psb_d_mv_dia_to_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_to_fmt + end interface + + interface + subroutine psb_d_mv_dia_from_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_from_fmt + end interface + + interface + subroutine psb_d_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_csput_a + end interface + + interface + subroutine psb_d_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_dia_csgetptn + end interface + + interface + subroutine psb_d_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_dia_csgetrow + end interface + + interface + subroutine psb_d_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_dia_csgetblk + end interface + + interface + subroutine psb_d_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_cssv + subroutine psb_d_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_cssm + end interface + + interface + subroutine psb_d_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_csmv + subroutine psb_d_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_csmm + end interface + + + interface + function psb_d_dia_maxval(a) result(res) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dia_maxval + end interface + + interface + function psb_d_dia_csnmi(a) result(res) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dia_csnmi + end interface + + interface + function psb_d_dia_csnm1(a) result(res) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dia_csnm1 + end interface + + interface + subroutine psb_d_dia_rowsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_rowsum + end interface + + interface + subroutine psb_d_dia_arwsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_arwsum + end interface + + interface + subroutine psb_d_dia_colsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_colsum + end interface + + interface + subroutine psb_d_dia_aclsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_aclsum + end interface + + interface + subroutine psb_d_dia_get_diag(a,d,info) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_get_diag + end interface + + interface + subroutine psb_d_dia_scal(d,a,info,side) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_dia_scal + end interface + + interface + subroutine psb_d_dia_scals(d,a,info) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_d_convert_dia_from_coo(a,tmp,info) + import :: psb_d_dia_sparse_mat, psb_ipk_, psb_d_coo_sparse_mat + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_d_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_dia_sizeof(a) result(res) + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function d_dia_sizeof + + function d_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function d_dia_get_fmt + + function d_dia_get_nzeros(a) result(res) + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function d_dia_get_nzeros + + ! function d_dia_get_size(a) result(res) + ! implicit none + ! class(psb_d_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function d_dia_get_size + + + ! function d_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_d_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function d_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine d_dia_free(a) + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_dia_free + + +end module psb_d_dia_mat_mod diff --git a/ext/psb_d_dns_mat_mod.f90 b/ext/psb_d_dns_mat_mod.f90 new file mode 100644 index 00000000..f8c977bc --- /dev/null +++ b/ext/psb_d_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_d_dns_mat_mod + + use psb_d_base_mat_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_dpk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + real(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => d_dns_get_size + procedure, pass(a) :: get_nzeros => d_dns_get_nzeros + procedure, nopass :: get_fmt => d_dns_get_fmt + procedure, pass(a) :: sizeof => d_dns_sizeof + procedure, pass(a) :: csmv => psb_d_dns_csmv + procedure, pass(a) :: csmm => psb_d_dns_csmm + procedure, pass(a) :: csnmi => psb_d_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_d_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_d_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_d_dns_get_diag + procedure, pass(a) :: csgetrow => psb_d_dns_csgetrow + procedure, pass(a) :: get_nz_row => d_dns_get_nz_row + procedure, pass(a) :: trim => psb_d_dns_trim + procedure, pass(a) :: free => d_dns_free + procedure, pass(a) :: mold => psb_d_dns_mold + + end type psb_d_dns_sparse_mat + + private :: d_dns_get_nzeros, d_dns_free, d_dns_get_fmt, & + & d_dns_get_size, d_dns_sizeof, d_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_d_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_d_dns_reallocate_nz(nz,a) + import :: psb_d_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dns_sparse_mat), intent(inout) :: a + end subroutine psb_d_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_d_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_d_dns_trim(a) + import :: psb_d_dns_sparse_mat + class(psb_d_dns_sparse_mat), intent(inout) :: a + end subroutine psb_d_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_d_dns_sparse_mat + !! \brief Allocate a class(psb_d_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_d_dns_mold(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_d_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_d_dns_allocate_mnnz(m,n,a,nz) + import :: psb_d_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Copy and convert to psb_d_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_d_cp_dns_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_dns_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Copy and convert from psb_d_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_d_cp_dns_from_coo(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Convert to psb_d_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_d_mv_dns_to_coo(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Convert from psb_d_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_d_mv_dns_from_coo(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_d_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_d_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_d_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_d_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_d_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_d_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_d_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_d_dns_csnmi(a) result(res) + import :: psb_d_dns_sparse_mat, psb_dpk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_d_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_d_dns_get_diag(a,d,info) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_d_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function d_dns_sizeof(a) result(res) + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function d_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_d_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function d_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function d_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_d_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function d_dns_get_nzeros(a) result(res) + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function d_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_d_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function d_dns_get_size(a) result(res) + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function d_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_d_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function d_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function d_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_d_dns_sparse_mat + !! Name says all + + subroutine d_dns_free(a) + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_dns_free + + +end module psb_d_dns_mat_mod diff --git a/ext/psb_d_ell_mat_mod.f90 b/ext/psb_d_ell_mat_mod.f90 new file mode 100644 index 00000000..3e34d63e --- /dev/null +++ b/ext/psb_d_ell_mat_mod.f90 @@ -0,0 +1,544 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_ell_mat_mod + + use psb_d_base_mat_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + real(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => d_ell_is_by_rows + procedure, pass(a) :: get_size => d_ell_get_size + procedure, pass(a) :: get_nzeros => d_ell_get_nzeros + procedure, nopass :: get_fmt => d_ell_get_fmt + procedure, pass(a) :: sizeof => d_ell_sizeof + procedure, pass(a) :: csmm => psb_d_ell_csmm + procedure, pass(a) :: csmv => psb_d_ell_csmv + procedure, pass(a) :: inner_cssm => psb_d_ell_cssm + procedure, pass(a) :: inner_cssv => psb_d_ell_cssv + procedure, pass(a) :: scals => psb_d_ell_scals + procedure, pass(a) :: scalv => psb_d_ell_scal + procedure, pass(a) :: maxval => psb_d_ell_maxval + procedure, pass(a) :: csnmi => psb_d_ell_csnmi + procedure, pass(a) :: csnm1 => psb_d_ell_csnm1 + procedure, pass(a) :: rowsum => psb_d_ell_rowsum + procedure, pass(a) :: arwsum => psb_d_ell_arwsum + procedure, pass(a) :: colsum => psb_d_ell_colsum + procedure, pass(a) :: aclsum => psb_d_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_d_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_d_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_d_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_d_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_d_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_d_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_d_ell_csput_a + procedure, pass(a) :: get_diag => psb_d_ell_get_diag + procedure, pass(a) :: csgetptn => psb_d_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_d_ell_csgetrow + procedure, pass(a) :: get_nz_row => d_ell_get_nz_row + procedure, pass(a) :: reinit => psb_d_ell_reinit + procedure, pass(a) :: trim => psb_d_ell_trim + procedure, pass(a) :: print => psb_d_ell_print + procedure, pass(a) :: free => d_ell_free + procedure, pass(a) :: mold => psb_d_ell_mold + + end type psb_d_ell_sparse_mat + + private :: d_ell_get_nzeros, d_ell_free, d_ell_get_fmt, & + & d_ell_get_size, d_ell_sizeof, d_ell_get_nz_row, & + & d_ell_is_by_rows + + interface + subroutine psb_d_ell_reallocate_nz(nz,a) + import :: psb_d_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_ell_sparse_mat), intent(inout) :: a + end subroutine psb_d_ell_reallocate_nz + end interface + + interface + subroutine psb_d_ell_reinit(a,clear) + import :: psb_d_ell_sparse_mat + class(psb_d_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_d_ell_reinit + end interface + + interface + subroutine psb_d_ell_trim(a) + import :: psb_d_ell_sparse_mat + class(psb_d_ell_sparse_mat), intent(inout) :: a + end subroutine psb_d_ell_trim + end interface + + interface + subroutine psb_d_ell_mold(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_mold + end interface + + interface + subroutine psb_d_ell_allocate_mnnz(m,n,a,nz) + import :: psb_d_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_ell_allocate_mnnz + end interface + + interface + subroutine psb_d_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_ell_print + end interface + + interface + subroutine psb_d_cp_ell_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_ell_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_to_coo + end interface + + interface + subroutine psb_d_cp_ell_from_coo(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_from_coo + end interface + + interface + subroutine psb_d_cp_ell_to_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_to_fmt + end interface + + interface + subroutine psb_d_cp_ell_from_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_from_fmt + end interface + + interface + subroutine psb_d_mv_ell_to_coo(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_to_coo + end interface + + interface + subroutine psb_d_mv_ell_from_coo(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_from_coo + end interface + + interface + subroutine psb_d_mv_ell_to_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_to_fmt + end interface + + interface + subroutine psb_d_mv_ell_from_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_from_fmt + end interface + + interface + subroutine psb_d_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_csput_a + end interface + + interface + subroutine psb_d_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_ell_csgetptn + end interface + + interface + subroutine psb_d_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_ell_csgetrow + end interface + + interface + subroutine psb_d_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_ell_csgetblk + end interface + + interface + subroutine psb_d_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_cssv + subroutine psb_d_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_cssm + end interface + + interface + subroutine psb_d_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_csmv + subroutine psb_d_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_csmm + end interface + + + interface + function psb_d_ell_maxval(a) result(res) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_ell_maxval + end interface + + interface + function psb_d_ell_csnmi(a) result(res) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_ell_csnmi + end interface + + interface + function psb_d_ell_csnm1(a) result(res) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_ell_csnm1 + end interface + + interface + subroutine psb_d_ell_rowsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_rowsum + end interface + + interface + subroutine psb_d_ell_arwsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_arwsum + end interface + + interface + subroutine psb_d_ell_colsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_colsum + end interface + + interface + subroutine psb_d_ell_aclsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_aclsum + end interface + + interface + subroutine psb_d_ell_get_diag(a,d,info) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_get_diag + end interface + + interface + subroutine psb_d_ell_scal(d,a,info,side) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_ell_scal + end interface + + interface + subroutine psb_d_ell_scals(d,a,info) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_scals + end interface + + interface + subroutine psi_d_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_d_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_ell_is_by_rows(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function d_ell_is_by_rows + + function d_ell_sizeof(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function d_ell_sizeof + + function d_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function d_ell_get_fmt + + function d_ell_get_nzeros(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function d_ell_get_nzeros + + function d_ell_get_size(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function d_ell_get_size + + + function d_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function d_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_ell_free(a) + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_ell_free + + +end module psb_d_ell_mat_mod diff --git a/ext/psb_d_hdia_mat_mod.f90 b/ext/psb_d_hdia_mat_mod.f90 new file mode 100644 index 00000000..25bc6898 --- /dev/null +++ b/ext/psb_d_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_d_hdia_mat_mod + + use psb_d_base_mat_mod + + + type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + real(psb_dpk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => d_hdia_get_size + procedure, pass(a) :: get_nzeros => d_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => d_hdia_set_nzeros + procedure, nopass :: get_fmt => d_hdia_get_fmt + procedure, pass(a) :: sizeof => d_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_d_hdia_csmm + procedure, pass(a) :: csmv => psb_d_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_d_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_d_hdia_cssv + ! procedure, pass(a) :: scals => psb_d_hdia_scals + ! procedure, pass(a) :: scalv => psb_d_hdia_scal + ! procedure, pass(a) :: maxval => psb_d_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_d_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_d_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_d_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_d_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_d_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_d_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_d_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_d_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_d_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_d_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_d_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_d_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_d_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_d_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_d_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => d_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_d_hdia_reinit + ! procedure, pass(a) :: trim => psb_d_hdia_trim + procedure, pass(a) :: print => psb_d_hdia_print + procedure, pass(a) :: free => d_hdia_free + procedure, pass(a) :: mold => psb_d_hdia_mold + + end type psb_d_hdia_sparse_mat + + private :: d_hdia_get_nzeros, d_hdia_set_nzeros, d_hdia_free, & + & d_hdia_get_fmt, d_hdia_sizeof +!!$ & +!!$ & d_hdia_get_nz_row d_hdia_get_size, + +!!$ interface +!!$ subroutine psb_d_hdia_reallocate_nz(nz,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_reinit(a,clear) +!!$ import :: psb_d_hdia_sparse_mat +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_d_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_trim(a) +!!$ import :: psb_d_hdia_sparse_mat +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_hdia_trim +!!$ end interface + + interface + subroutine psb_d_hdia_mold(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hdia_mold + end interface + + interface + subroutine psb_d_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_d_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_hdia_allocate_mnnz + end interface + + interface + subroutine psb_d_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_hdia_print + end interface + + interface + subroutine psb_d_cp_hdia_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_hdia_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hdia_to_coo + end interface + + interface + subroutine psb_d_cp_hdia_from_coo(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_d_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_d_mv_hdia_to_coo(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hdia_to_coo + end interface + + interface + subroutine psb_d_mv_hdia_from_coo(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_d_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_d_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ real(psb_dpk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_d_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_d_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_d_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdia_cssv +!!$ subroutine psb_d_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdia_cssm +!!$ end interface + + interface + subroutine psb_d_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hdia_csmv +!!$ subroutine psb_d_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_d_hdia_maxval(a) result(res) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_d_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_d_hdia_csnmi(a) result(res) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_d_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_d_hdia_csnm1(a) result(res) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_d_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_rowsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_arwsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_colsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_aclsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_get_diag(a,d,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_scal(d,a,info,side) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_d_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_d_hdia_scals(d,a,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * psb_sizeof_dp + + end function d_hdia_sizeof + + function d_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function d_hdia_get_fmt + + function d_hdia_get_nzeros(a) result(res) + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function d_hdia_get_nzeros + + subroutine d_hdia_set_nzeros(a,nz) + implicit none + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine d_hdia_set_nzeros + + ! function d_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_d_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function d_hdia_get_size + + + ! function d_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_d_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function d_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine d_hdia_free(a) + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_hdia_free + + +end module psb_d_hdia_mat_mod diff --git a/ext/psb_d_hll_mat_mod.f90 b/ext/psb_d_hll_mat_mod.f90 new file mode 100644 index 00000000..acc3b312 --- /dev/null +++ b/ext/psb_d_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_hll_mat_mod + + use psb_d_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + real(psb_dpk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => d_hll_get_hksz + procedure, pass(a) :: set_hksz => d_hll_set_hksz + procedure, pass(a) :: get_size => d_hll_get_size + procedure, pass(a) :: set_nzeros => d_hll_set_nzeros + procedure, pass(a) :: get_nzeros => d_hll_get_nzeros + procedure, nopass :: get_fmt => d_hll_get_fmt + procedure, pass(a) :: sizeof => d_hll_sizeof + procedure, pass(a) :: csmm => psb_d_hll_csmm + procedure, pass(a) :: csmv => psb_d_hll_csmv + procedure, pass(a) :: inner_cssm => psb_d_hll_cssm + procedure, pass(a) :: inner_cssv => psb_d_hll_cssv + procedure, pass(a) :: scals => psb_d_hll_scals + procedure, pass(a) :: scalv => psb_d_hll_scal + procedure, pass(a) :: maxval => psb_d_hll_maxval + procedure, pass(a) :: csnmi => psb_d_hll_csnmi + procedure, pass(a) :: csnm1 => psb_d_hll_csnm1 + procedure, pass(a) :: rowsum => psb_d_hll_rowsum + procedure, pass(a) :: arwsum => psb_d_hll_arwsum + procedure, pass(a) :: colsum => psb_d_hll_colsum + procedure, pass(a) :: aclsum => psb_d_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_d_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_d_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_d_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_d_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_d_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_d_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_d_hll_csput_a + procedure, pass(a) :: get_diag => psb_d_hll_get_diag + procedure, pass(a) :: csgetptn => psb_d_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_d_hll_csgetrow + procedure, pass(a) :: get_nz_row => d_hll_get_nz_row + procedure, pass(a) :: reinit => psb_d_hll_reinit + procedure, pass(a) :: print => psb_d_hll_print + procedure, pass(a) :: free => d_hll_free + procedure, pass(a) :: mold => psb_d_hll_mold + + end type psb_d_hll_sparse_mat + + private :: d_hll_get_nzeros, d_hll_free, d_hll_get_fmt, & + & d_hll_get_size, d_hll_sizeof, d_hll_get_nz_row, & + & d_hll_set_nzeros, d_hll_get_hksz, d_hll_set_hksz + + interface + subroutine psb_d_hll_reallocate_nz(nz,a) + import :: psb_d_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hll_sparse_mat), intent(inout) :: a + end subroutine psb_d_hll_reallocate_nz + end interface + + interface + subroutine psb_d_hll_reinit(a,clear) + import :: psb_d_hll_sparse_mat + class(psb_d_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_d_hll_reinit + end interface + + interface + subroutine psb_d_hll_mold(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_mold + end interface + + interface + subroutine psb_d_hll_allocate_mnnz(m,n,a,nz) + import :: psb_d_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_hll_allocate_mnnz + end interface + + interface + subroutine psb_d_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_hll_print + end interface + + interface + subroutine psb_d_cp_hll_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_hll_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_to_coo + end interface + + interface + subroutine psb_d_cp_hll_from_coo(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_from_coo + end interface + + interface + subroutine psb_d_cp_hll_to_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_to_fmt + end interface + + interface + subroutine psb_d_cp_hll_from_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_from_fmt + end interface + + interface + subroutine psb_d_mv_hll_to_coo(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_to_coo + end interface + + interface + subroutine psb_d_mv_hll_from_coo(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_from_coo + end interface + + interface + subroutine psb_d_mv_hll_to_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_to_fmt + end interface + + interface + subroutine psb_d_mv_hll_from_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_from_fmt + end interface + + interface + subroutine psb_d_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_csput_a + end interface + + interface + subroutine psb_d_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_hll_csgetptn + end interface + + interface + subroutine psb_d_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_hll_csgetrow + end interface + + interface + subroutine psb_d_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_hll_csgetblk + end interface + + interface + subroutine psb_d_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_cssv + subroutine psb_d_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_cssm + end interface + + interface + subroutine psb_d_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_csmv + subroutine psb_d_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_csmm + end interface + + + interface + function psb_d_hll_maxval(a) result(res) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_hll_maxval + end interface + + interface + function psb_d_hll_csnmi(a) result(res) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_hll_csnmi + end interface + + interface + function psb_d_hll_csnm1(a) result(res) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_hll_csnm1 + end interface + + interface + subroutine psb_d_hll_rowsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_rowsum + end interface + + interface + subroutine psb_d_hll_arwsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_arwsum + end interface + + interface + subroutine psb_d_hll_colsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_colsum + end interface + + interface + subroutine psb_d_hll_aclsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_aclsum + end interface + + interface + subroutine psb_d_hll_get_diag(a,d,info) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_get_diag + end interface + + interface + subroutine psb_d_hll_scal(d,a,info,side) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_hll_scal + end interface + + interface + subroutine psb_d_hll_scals(d,a,info) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_d_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_d_hll_sparse_mat, psb_ipk_, psb_d_coo_sparse_mat + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_d_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_hll_sizeof(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function d_hll_sizeof + + function d_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function d_hll_get_fmt + + subroutine d_hll_set_nzeros(a,n) + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine d_hll_set_nzeros + + function d_hll_get_nzeros(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function d_hll_get_nzeros + + function d_hll_get_size(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function d_hll_get_size + + + + function d_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function d_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_hll_free(a) + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine d_hll_free + + subroutine d_hll_set_hksz(a,n) + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine d_hll_set_hksz + + function d_hll_get_hksz(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function d_hll_get_hksz + +end module psb_d_hll_mat_mod diff --git a/ext/psb_ext_mod.F90 b/ext/psb_ext_mod.F90 new file mode 100644 index 00000000..b1dbdb59 --- /dev/null +++ b/ext/psb_ext_mod.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_ext_mod + use psb_const_mod + use psi_ext_util_mod + + use psb_s_dns_mat_mod + use psb_d_dns_mat_mod + use psb_c_dns_mat_mod + use psb_z_dns_mat_mod + + use psb_d_ell_mat_mod + use psb_s_ell_mat_mod + use psb_z_ell_mat_mod + use psb_c_ell_mat_mod + + use psb_s_hll_mat_mod + use psb_d_hll_mat_mod + use psb_c_hll_mat_mod + use psb_z_hll_mat_mod + + use psb_s_dia_mat_mod + use psb_d_dia_mat_mod + use psb_c_dia_mat_mod + use psb_z_dia_mat_mod + + use psb_s_hdia_mat_mod + use psb_d_hdia_mat_mod + use psb_c_hdia_mat_mod + use psb_z_hdia_mat_mod + +#ifdef HAVE_RSB + use psb_d_rsb_mat_mod +#endif +end module psb_ext_mod diff --git a/ext/psb_s_dia_mat_mod.f90 b/ext/psb_s_dia_mat_mod.f90 new file mode 100644 index 00000000..3a11d959 --- /dev/null +++ b/ext/psb_s_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_dia_mat_mod + + use psb_s_base_mat_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + real(psb_spk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => s_dia_get_size + procedure, pass(a) :: get_nzeros => s_dia_get_nzeros + procedure, nopass :: get_fmt => s_dia_get_fmt + procedure, pass(a) :: sizeof => s_dia_sizeof + procedure, pass(a) :: csmm => psb_s_dia_csmm + procedure, pass(a) :: csmv => psb_s_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_s_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_s_dia_cssv + procedure, pass(a) :: scals => psb_s_dia_scals + procedure, pass(a) :: scalv => psb_s_dia_scal + procedure, pass(a) :: maxval => psb_s_dia_maxval + procedure, pass(a) :: rowsum => psb_s_dia_rowsum + procedure, pass(a) :: arwsum => psb_s_dia_arwsum + procedure, pass(a) :: colsum => psb_s_dia_colsum + procedure, pass(a) :: aclsum => psb_s_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_s_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_s_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_s_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_s_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_s_dia_csput_a + procedure, pass(a) :: get_diag => psb_s_dia_get_diag + procedure, pass(a) :: csgetptn => psb_s_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_s_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => s_dia_get_nz_row + procedure, pass(a) :: reinit => psb_s_dia_reinit + ! procedure, pass(a) :: trim => psb_s_dia_trim + procedure, pass(a) :: print => psb_s_dia_print + procedure, pass(a) :: free => s_dia_free + procedure, pass(a) :: mold => psb_s_dia_mold + + end type psb_s_dia_sparse_mat + + private :: s_dia_get_nzeros, s_dia_free, s_dia_get_fmt, & + & s_dia_sizeof !, s_dia_get_size, s_dia_get_nz_row + + interface + subroutine psb_s_dia_reallocate_nz(nz,a) + import :: psb_s_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dia_sparse_mat), intent(inout) :: a + end subroutine psb_s_dia_reallocate_nz + end interface + + interface + subroutine psb_s_dia_reinit(a,clear) + import :: psb_s_dia_sparse_mat + class(psb_s_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_s_dia_reinit + end interface + + interface + subroutine psb_s_dia_trim(a) + import :: psb_s_dia_sparse_mat + class(psb_s_dia_sparse_mat), intent(inout) :: a + end subroutine psb_s_dia_trim + end interface + + interface + subroutine psb_s_dia_mold(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_mold + end interface + + interface + subroutine psb_s_dia_allocate_mnnz(m,n,a,nz) + import :: psb_s_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_dia_allocate_mnnz + end interface + + interface + subroutine psb_s_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_dia_print + end interface + + interface + subroutine psb_s_cp_dia_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_dia_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_to_coo + end interface + + interface + subroutine psb_s_cp_dia_from_coo(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_from_coo + end interface + + interface + subroutine psb_s_cp_dia_to_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_to_fmt + end interface + + interface + subroutine psb_s_cp_dia_from_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_from_fmt + end interface + + interface + subroutine psb_s_mv_dia_to_coo(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_to_coo + end interface + + interface + subroutine psb_s_mv_dia_from_coo(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_from_coo + end interface + + interface + subroutine psb_s_mv_dia_to_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_to_fmt + end interface + + interface + subroutine psb_s_mv_dia_from_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_from_fmt + end interface + + interface + subroutine psb_s_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_csput_a + end interface + + interface + subroutine psb_s_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_dia_csgetptn + end interface + + interface + subroutine psb_s_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_dia_csgetrow + end interface + + interface + subroutine psb_s_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_dia_csgetblk + end interface + + interface + subroutine psb_s_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_cssv + subroutine psb_s_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_cssm + end interface + + interface + subroutine psb_s_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_csmv + subroutine psb_s_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_csmm + end interface + + + interface + function psb_s_dia_maxval(a) result(res) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dia_maxval + end interface + + interface + function psb_s_dia_csnmi(a) result(res) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dia_csnmi + end interface + + interface + function psb_s_dia_csnm1(a) result(res) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dia_csnm1 + end interface + + interface + subroutine psb_s_dia_rowsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_rowsum + end interface + + interface + subroutine psb_s_dia_arwsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_arwsum + end interface + + interface + subroutine psb_s_dia_colsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_colsum + end interface + + interface + subroutine psb_s_dia_aclsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_aclsum + end interface + + interface + subroutine psb_s_dia_get_diag(a,d,info) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_get_diag + end interface + + interface + subroutine psb_s_dia_scal(d,a,info,side) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_dia_scal + end interface + + interface + subroutine psb_s_dia_scals(d,a,info) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_s_convert_dia_from_coo(a,tmp,info) + import :: psb_s_dia_sparse_mat, psb_ipk_, psb_s_coo_sparse_mat + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_s_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_dia_sizeof(a) result(res) + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function s_dia_sizeof + + function s_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function s_dia_get_fmt + + function s_dia_get_nzeros(a) result(res) + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function s_dia_get_nzeros + + ! function s_dia_get_size(a) result(res) + ! implicit none + ! class(psb_s_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function s_dia_get_size + + + ! function s_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_s_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function s_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine s_dia_free(a) + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_dia_free + + +end module psb_s_dia_mat_mod diff --git a/ext/psb_s_dns_mat_mod.f90 b/ext/psb_s_dns_mat_mod.f90 new file mode 100644 index 00000000..e9ea5f26 --- /dev/null +++ b/ext/psb_s_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_s_dns_mat_mod + + use psb_s_base_mat_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_spk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + real(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => s_dns_get_size + procedure, pass(a) :: get_nzeros => s_dns_get_nzeros + procedure, nopass :: get_fmt => s_dns_get_fmt + procedure, pass(a) :: sizeof => s_dns_sizeof + procedure, pass(a) :: csmv => psb_s_dns_csmv + procedure, pass(a) :: csmm => psb_s_dns_csmm + procedure, pass(a) :: csnmi => psb_s_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_s_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_s_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_s_dns_get_diag + procedure, pass(a) :: csgetrow => psb_s_dns_csgetrow + procedure, pass(a) :: get_nz_row => s_dns_get_nz_row + procedure, pass(a) :: trim => psb_s_dns_trim + procedure, pass(a) :: free => s_dns_free + procedure, pass(a) :: mold => psb_s_dns_mold + + end type psb_s_dns_sparse_mat + + private :: s_dns_get_nzeros, s_dns_free, s_dns_get_fmt, & + & s_dns_get_size, s_dns_sizeof, s_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_s_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_s_dns_reallocate_nz(nz,a) + import :: psb_s_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dns_sparse_mat), intent(inout) :: a + end subroutine psb_s_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_s_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_s_dns_trim(a) + import :: psb_s_dns_sparse_mat + class(psb_s_dns_sparse_mat), intent(inout) :: a + end subroutine psb_s_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_s_dns_sparse_mat + !! \brief Allocate a class(psb_s_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_s_dns_mold(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_s_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_s_dns_allocate_mnnz(m,n,a,nz) + import :: psb_s_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Copy and convert to psb_s_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_s_cp_dns_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_dns_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Copy and convert from psb_s_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_s_cp_dns_from_coo(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Convert to psb_s_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_s_mv_dns_to_coo(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Convert from psb_s_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_s_mv_dns_from_coo(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_s_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_s_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_s_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_s_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_s_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_s_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_s_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_s_dns_csnmi(a) result(res) + import :: psb_s_dns_sparse_mat, psb_spk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_s_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_s_dns_get_diag(a,d,info) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_s_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function s_dns_sizeof(a) result(res) + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function s_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_s_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function s_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function s_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_s_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function s_dns_get_nzeros(a) result(res) + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function s_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_s_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function s_dns_get_size(a) result(res) + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function s_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_s_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function s_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function s_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_s_dns_sparse_mat + !! Name says all + + subroutine s_dns_free(a) + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_dns_free + + +end module psb_s_dns_mat_mod diff --git a/ext/psb_s_ell_mat_mod.f90 b/ext/psb_s_ell_mat_mod.f90 new file mode 100644 index 00000000..5f09913a --- /dev/null +++ b/ext/psb_s_ell_mat_mod.f90 @@ -0,0 +1,544 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_ell_mat_mod + + use psb_s_base_mat_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + real(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => s_ell_is_by_rows + procedure, pass(a) :: get_size => s_ell_get_size + procedure, pass(a) :: get_nzeros => s_ell_get_nzeros + procedure, nopass :: get_fmt => s_ell_get_fmt + procedure, pass(a) :: sizeof => s_ell_sizeof + procedure, pass(a) :: csmm => psb_s_ell_csmm + procedure, pass(a) :: csmv => psb_s_ell_csmv + procedure, pass(a) :: inner_cssm => psb_s_ell_cssm + procedure, pass(a) :: inner_cssv => psb_s_ell_cssv + procedure, pass(a) :: scals => psb_s_ell_scals + procedure, pass(a) :: scalv => psb_s_ell_scal + procedure, pass(a) :: maxval => psb_s_ell_maxval + procedure, pass(a) :: csnmi => psb_s_ell_csnmi + procedure, pass(a) :: csnm1 => psb_s_ell_csnm1 + procedure, pass(a) :: rowsum => psb_s_ell_rowsum + procedure, pass(a) :: arwsum => psb_s_ell_arwsum + procedure, pass(a) :: colsum => psb_s_ell_colsum + procedure, pass(a) :: aclsum => psb_s_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_s_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_s_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_s_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_s_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_s_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_s_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_s_ell_csput_a + procedure, pass(a) :: get_diag => psb_s_ell_get_diag + procedure, pass(a) :: csgetptn => psb_s_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_s_ell_csgetrow + procedure, pass(a) :: get_nz_row => s_ell_get_nz_row + procedure, pass(a) :: reinit => psb_s_ell_reinit + procedure, pass(a) :: trim => psb_s_ell_trim + procedure, pass(a) :: print => psb_s_ell_print + procedure, pass(a) :: free => s_ell_free + procedure, pass(a) :: mold => psb_s_ell_mold + + end type psb_s_ell_sparse_mat + + private :: s_ell_get_nzeros, s_ell_free, s_ell_get_fmt, & + & s_ell_get_size, s_ell_sizeof, s_ell_get_nz_row, & + & s_ell_is_by_rows + + interface + subroutine psb_s_ell_reallocate_nz(nz,a) + import :: psb_s_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_ell_sparse_mat), intent(inout) :: a + end subroutine psb_s_ell_reallocate_nz + end interface + + interface + subroutine psb_s_ell_reinit(a,clear) + import :: psb_s_ell_sparse_mat + class(psb_s_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_s_ell_reinit + end interface + + interface + subroutine psb_s_ell_trim(a) + import :: psb_s_ell_sparse_mat + class(psb_s_ell_sparse_mat), intent(inout) :: a + end subroutine psb_s_ell_trim + end interface + + interface + subroutine psb_s_ell_mold(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_mold + end interface + + interface + subroutine psb_s_ell_allocate_mnnz(m,n,a,nz) + import :: psb_s_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_ell_allocate_mnnz + end interface + + interface + subroutine psb_s_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_ell_print + end interface + + interface + subroutine psb_s_cp_ell_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_ell_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_to_coo + end interface + + interface + subroutine psb_s_cp_ell_from_coo(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_from_coo + end interface + + interface + subroutine psb_s_cp_ell_to_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_to_fmt + end interface + + interface + subroutine psb_s_cp_ell_from_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_from_fmt + end interface + + interface + subroutine psb_s_mv_ell_to_coo(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_to_coo + end interface + + interface + subroutine psb_s_mv_ell_from_coo(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_from_coo + end interface + + interface + subroutine psb_s_mv_ell_to_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_to_fmt + end interface + + interface + subroutine psb_s_mv_ell_from_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_from_fmt + end interface + + interface + subroutine psb_s_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_csput_a + end interface + + interface + subroutine psb_s_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_ell_csgetptn + end interface + + interface + subroutine psb_s_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_ell_csgetrow + end interface + + interface + subroutine psb_s_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_ell_csgetblk + end interface + + interface + subroutine psb_s_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_cssv + subroutine psb_s_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_cssm + end interface + + interface + subroutine psb_s_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_csmv + subroutine psb_s_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_csmm + end interface + + + interface + function psb_s_ell_maxval(a) result(res) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_ell_maxval + end interface + + interface + function psb_s_ell_csnmi(a) result(res) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_ell_csnmi + end interface + + interface + function psb_s_ell_csnm1(a) result(res) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_ell_csnm1 + end interface + + interface + subroutine psb_s_ell_rowsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_rowsum + end interface + + interface + subroutine psb_s_ell_arwsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_arwsum + end interface + + interface + subroutine psb_s_ell_colsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_colsum + end interface + + interface + subroutine psb_s_ell_aclsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_aclsum + end interface + + interface + subroutine psb_s_ell_get_diag(a,d,info) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_get_diag + end interface + + interface + subroutine psb_s_ell_scal(d,a,info,side) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_ell_scal + end interface + + interface + subroutine psb_s_ell_scals(d,a,info) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_scals + end interface + + interface + subroutine psi_s_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_s_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_ell_is_by_rows(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function s_ell_is_by_rows + + function s_ell_sizeof(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function s_ell_sizeof + + function s_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function s_ell_get_fmt + + function s_ell_get_nzeros(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function s_ell_get_nzeros + + function s_ell_get_size(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function s_ell_get_size + + + function s_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function s_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_ell_free(a) + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_ell_free + + +end module psb_s_ell_mat_mod diff --git a/ext/psb_s_hdia_mat_mod.f90 b/ext/psb_s_hdia_mat_mod.f90 new file mode 100644 index 00000000..b7b2b110 --- /dev/null +++ b/ext/psb_s_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_s_hdia_mat_mod + + use psb_s_base_mat_mod + + + type, extends(psb_s_base_sparse_mat) :: psb_s_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + real(psb_spk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => s_hdia_get_size + procedure, pass(a) :: get_nzeros => s_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => s_hdia_set_nzeros + procedure, nopass :: get_fmt => s_hdia_get_fmt + procedure, pass(a) :: sizeof => s_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_s_hdia_csmm + procedure, pass(a) :: csmv => psb_s_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_s_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_s_hdia_cssv + ! procedure, pass(a) :: scals => psb_s_hdia_scals + ! procedure, pass(a) :: scalv => psb_s_hdia_scal + ! procedure, pass(a) :: maxval => psb_s_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_s_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_s_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_s_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_s_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_s_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_s_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_s_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_s_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_s_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_s_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_s_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_s_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_s_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_s_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_s_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_s_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => s_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_s_hdia_reinit + ! procedure, pass(a) :: trim => psb_s_hdia_trim + procedure, pass(a) :: print => psb_s_hdia_print + procedure, pass(a) :: free => s_hdia_free + procedure, pass(a) :: mold => psb_s_hdia_mold + + end type psb_s_hdia_sparse_mat + + private :: s_hdia_get_nzeros, s_hdia_set_nzeros, s_hdia_free, & + & s_hdia_get_fmt, s_hdia_sizeof +!!$ & +!!$ & s_hdia_get_nz_row s_hdia_get_size, + +!!$ interface +!!$ subroutine psb_s_hdia_reallocate_nz(nz,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_reinit(a,clear) +!!$ import :: psb_s_hdia_sparse_mat +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_s_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_trim(a) +!!$ import :: psb_s_hdia_sparse_mat +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_hdia_trim +!!$ end interface + + interface + subroutine psb_s_hdia_mold(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hdia_mold + end interface + + interface + subroutine psb_s_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_s_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_hdia_allocate_mnnz + end interface + + interface + subroutine psb_s_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_hdia_print + end interface + + interface + subroutine psb_s_cp_hdia_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_hdia_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hdia_to_coo + end interface + + interface + subroutine psb_s_cp_hdia_from_coo(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_s_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_s_mv_hdia_to_coo(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hdia_to_coo + end interface + + interface + subroutine psb_s_mv_hdia_from_coo(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_s_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_s_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ real(psb_spk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_s_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_s_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_s_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdia_cssv +!!$ subroutine psb_s_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdia_cssm +!!$ end interface + + interface + subroutine psb_s_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hdia_csmv +!!$ subroutine psb_s_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_s_hdia_maxval(a) result(res) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_s_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_s_hdia_csnmi(a) result(res) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_s_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_s_hdia_csnm1(a) result(res) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_s_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_rowsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_arwsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_colsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_aclsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_get_diag(a,d,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_scal(d,a,info,side) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_s_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_s_hdia_scals(d,a,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * psb_sizeof_sp + + end function s_hdia_sizeof + + function s_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function s_hdia_get_fmt + + function s_hdia_get_nzeros(a) result(res) + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function s_hdia_get_nzeros + + subroutine s_hdia_set_nzeros(a,nz) + implicit none + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine s_hdia_set_nzeros + + ! function s_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_s_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function s_hdia_get_size + + + ! function s_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_s_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function s_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine s_hdia_free(a) + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_hdia_free + + +end module psb_s_hdia_mat_mod diff --git a/ext/psb_s_hll_mat_mod.f90 b/ext/psb_s_hll_mat_mod.f90 new file mode 100644 index 00000000..735091c8 --- /dev/null +++ b/ext/psb_s_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_hll_mat_mod + + use psb_s_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + real(psb_spk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => s_hll_get_hksz + procedure, pass(a) :: set_hksz => s_hll_set_hksz + procedure, pass(a) :: get_size => s_hll_get_size + procedure, pass(a) :: set_nzeros => s_hll_set_nzeros + procedure, pass(a) :: get_nzeros => s_hll_get_nzeros + procedure, nopass :: get_fmt => s_hll_get_fmt + procedure, pass(a) :: sizeof => s_hll_sizeof + procedure, pass(a) :: csmm => psb_s_hll_csmm + procedure, pass(a) :: csmv => psb_s_hll_csmv + procedure, pass(a) :: inner_cssm => psb_s_hll_cssm + procedure, pass(a) :: inner_cssv => psb_s_hll_cssv + procedure, pass(a) :: scals => psb_s_hll_scals + procedure, pass(a) :: scalv => psb_s_hll_scal + procedure, pass(a) :: maxval => psb_s_hll_maxval + procedure, pass(a) :: csnmi => psb_s_hll_csnmi + procedure, pass(a) :: csnm1 => psb_s_hll_csnm1 + procedure, pass(a) :: rowsum => psb_s_hll_rowsum + procedure, pass(a) :: arwsum => psb_s_hll_arwsum + procedure, pass(a) :: colsum => psb_s_hll_colsum + procedure, pass(a) :: aclsum => psb_s_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_s_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_s_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_s_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_s_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_s_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_s_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_s_hll_csput_a + procedure, pass(a) :: get_diag => psb_s_hll_get_diag + procedure, pass(a) :: csgetptn => psb_s_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_s_hll_csgetrow + procedure, pass(a) :: get_nz_row => s_hll_get_nz_row + procedure, pass(a) :: reinit => psb_s_hll_reinit + procedure, pass(a) :: print => psb_s_hll_print + procedure, pass(a) :: free => s_hll_free + procedure, pass(a) :: mold => psb_s_hll_mold + + end type psb_s_hll_sparse_mat + + private :: s_hll_get_nzeros, s_hll_free, s_hll_get_fmt, & + & s_hll_get_size, s_hll_sizeof, s_hll_get_nz_row, & + & s_hll_set_nzeros, s_hll_get_hksz, s_hll_set_hksz + + interface + subroutine psb_s_hll_reallocate_nz(nz,a) + import :: psb_s_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hll_sparse_mat), intent(inout) :: a + end subroutine psb_s_hll_reallocate_nz + end interface + + interface + subroutine psb_s_hll_reinit(a,clear) + import :: psb_s_hll_sparse_mat + class(psb_s_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_s_hll_reinit + end interface + + interface + subroutine psb_s_hll_mold(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_mold + end interface + + interface + subroutine psb_s_hll_allocate_mnnz(m,n,a,nz) + import :: psb_s_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_hll_allocate_mnnz + end interface + + interface + subroutine psb_s_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_hll_print + end interface + + interface + subroutine psb_s_cp_hll_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_hll_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_to_coo + end interface + + interface + subroutine psb_s_cp_hll_from_coo(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_from_coo + end interface + + interface + subroutine psb_s_cp_hll_to_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_to_fmt + end interface + + interface + subroutine psb_s_cp_hll_from_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_from_fmt + end interface + + interface + subroutine psb_s_mv_hll_to_coo(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_to_coo + end interface + + interface + subroutine psb_s_mv_hll_from_coo(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_from_coo + end interface + + interface + subroutine psb_s_mv_hll_to_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_to_fmt + end interface + + interface + subroutine psb_s_mv_hll_from_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_from_fmt + end interface + + interface + subroutine psb_s_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_csput_a + end interface + + interface + subroutine psb_s_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_hll_csgetptn + end interface + + interface + subroutine psb_s_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_hll_csgetrow + end interface + + interface + subroutine psb_s_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_hll_csgetblk + end interface + + interface + subroutine psb_s_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_cssv + subroutine psb_s_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_cssm + end interface + + interface + subroutine psb_s_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_csmv + subroutine psb_s_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_csmm + end interface + + + interface + function psb_s_hll_maxval(a) result(res) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_hll_maxval + end interface + + interface + function psb_s_hll_csnmi(a) result(res) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_hll_csnmi + end interface + + interface + function psb_s_hll_csnm1(a) result(res) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_hll_csnm1 + end interface + + interface + subroutine psb_s_hll_rowsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_rowsum + end interface + + interface + subroutine psb_s_hll_arwsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_arwsum + end interface + + interface + subroutine psb_s_hll_colsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_colsum + end interface + + interface + subroutine psb_s_hll_aclsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_aclsum + end interface + + interface + subroutine psb_s_hll_get_diag(a,d,info) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_get_diag + end interface + + interface + subroutine psb_s_hll_scal(d,a,info,side) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_hll_scal + end interface + + interface + subroutine psb_s_hll_scals(d,a,info) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_s_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_s_hll_sparse_mat, psb_ipk_, psb_s_coo_sparse_mat + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_s_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_hll_sizeof(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function s_hll_sizeof + + function s_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function s_hll_get_fmt + + subroutine s_hll_set_nzeros(a,n) + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine s_hll_set_nzeros + + function s_hll_get_nzeros(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function s_hll_get_nzeros + + function s_hll_get_size(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function s_hll_get_size + + + + function s_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function s_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_hll_free(a) + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine s_hll_free + + subroutine s_hll_set_hksz(a,n) + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine s_hll_set_hksz + + function s_hll_get_hksz(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function s_hll_get_hksz + +end module psb_s_hll_mat_mod diff --git a/ext/psb_z_dia_mat_mod.f90 b/ext/psb_z_dia_mat_mod.f90 new file mode 100644 index 00000000..76d071af --- /dev/null +++ b/ext/psb_z_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_dia_mat_mod + + use psb_z_base_mat_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + complex(psb_dpk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => z_dia_get_size + procedure, pass(a) :: get_nzeros => z_dia_get_nzeros + procedure, nopass :: get_fmt => z_dia_get_fmt + procedure, pass(a) :: sizeof => z_dia_sizeof + procedure, pass(a) :: csmm => psb_z_dia_csmm + procedure, pass(a) :: csmv => psb_z_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_z_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_z_dia_cssv + procedure, pass(a) :: scals => psb_z_dia_scals + procedure, pass(a) :: scalv => psb_z_dia_scal + procedure, pass(a) :: maxval => psb_z_dia_maxval + procedure, pass(a) :: rowsum => psb_z_dia_rowsum + procedure, pass(a) :: arwsum => psb_z_dia_arwsum + procedure, pass(a) :: colsum => psb_z_dia_colsum + procedure, pass(a) :: aclsum => psb_z_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_z_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_z_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_z_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_z_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_z_dia_csput_a + procedure, pass(a) :: get_diag => psb_z_dia_get_diag + procedure, pass(a) :: csgetptn => psb_z_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_z_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => z_dia_get_nz_row + procedure, pass(a) :: reinit => psb_z_dia_reinit + ! procedure, pass(a) :: trim => psb_z_dia_trim + procedure, pass(a) :: print => psb_z_dia_print + procedure, pass(a) :: free => z_dia_free + procedure, pass(a) :: mold => psb_z_dia_mold + + end type psb_z_dia_sparse_mat + + private :: z_dia_get_nzeros, z_dia_free, z_dia_get_fmt, & + & z_dia_sizeof !, z_dia_get_size, z_dia_get_nz_row + + interface + subroutine psb_z_dia_reallocate_nz(nz,a) + import :: psb_z_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dia_sparse_mat), intent(inout) :: a + end subroutine psb_z_dia_reallocate_nz + end interface + + interface + subroutine psb_z_dia_reinit(a,clear) + import :: psb_z_dia_sparse_mat + class(psb_z_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_z_dia_reinit + end interface + + interface + subroutine psb_z_dia_trim(a) + import :: psb_z_dia_sparse_mat + class(psb_z_dia_sparse_mat), intent(inout) :: a + end subroutine psb_z_dia_trim + end interface + + interface + subroutine psb_z_dia_mold(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_mold + end interface + + interface + subroutine psb_z_dia_allocate_mnnz(m,n,a,nz) + import :: psb_z_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_dia_allocate_mnnz + end interface + + interface + subroutine psb_z_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_dia_print + end interface + + interface + subroutine psb_z_cp_dia_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_dia_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_to_coo + end interface + + interface + subroutine psb_z_cp_dia_from_coo(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_from_coo + end interface + + interface + subroutine psb_z_cp_dia_to_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_to_fmt + end interface + + interface + subroutine psb_z_cp_dia_from_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_from_fmt + end interface + + interface + subroutine psb_z_mv_dia_to_coo(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_to_coo + end interface + + interface + subroutine psb_z_mv_dia_from_coo(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_from_coo + end interface + + interface + subroutine psb_z_mv_dia_to_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_to_fmt + end interface + + interface + subroutine psb_z_mv_dia_from_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_from_fmt + end interface + + interface + subroutine psb_z_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_csput_a + end interface + + interface + subroutine psb_z_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_dia_csgetptn + end interface + + interface + subroutine psb_z_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_dia_csgetrow + end interface + + interface + subroutine psb_z_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_dia_csgetblk + end interface + + interface + subroutine psb_z_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_cssv + subroutine psb_z_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_cssm + end interface + + interface + subroutine psb_z_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_csmv + subroutine psb_z_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_csmm + end interface + + + interface + function psb_z_dia_maxval(a) result(res) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dia_maxval + end interface + + interface + function psb_z_dia_csnmi(a) result(res) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dia_csnmi + end interface + + interface + function psb_z_dia_csnm1(a) result(res) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dia_csnm1 + end interface + + interface + subroutine psb_z_dia_rowsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_rowsum + end interface + + interface + subroutine psb_z_dia_arwsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_arwsum + end interface + + interface + subroutine psb_z_dia_colsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_colsum + end interface + + interface + subroutine psb_z_dia_aclsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_aclsum + end interface + + interface + subroutine psb_z_dia_get_diag(a,d,info) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_get_diag + end interface + + interface + subroutine psb_z_dia_scal(d,a,info,side) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_dia_scal + end interface + + interface + subroutine psb_z_dia_scals(d,a,info) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_z_convert_dia_from_coo(a,tmp,info) + import :: psb_z_dia_sparse_mat, psb_ipk_, psb_z_coo_sparse_mat + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_z_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_dia_sizeof(a) result(res) + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function z_dia_sizeof + + function z_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function z_dia_get_fmt + + function z_dia_get_nzeros(a) result(res) + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function z_dia_get_nzeros + + ! function z_dia_get_size(a) result(res) + ! implicit none + ! class(psb_z_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function z_dia_get_size + + + ! function z_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_z_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function z_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine z_dia_free(a) + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_dia_free + + +end module psb_z_dia_mat_mod diff --git a/ext/psb_z_dns_mat_mod.f90 b/ext/psb_z_dns_mat_mod.f90 new file mode 100644 index 00000000..6147057d --- /dev/null +++ b/ext/psb_z_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_z_dns_mat_mod + + use psb_z_base_mat_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_dpk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + complex(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => z_dns_get_size + procedure, pass(a) :: get_nzeros => z_dns_get_nzeros + procedure, nopass :: get_fmt => z_dns_get_fmt + procedure, pass(a) :: sizeof => z_dns_sizeof + procedure, pass(a) :: csmv => psb_z_dns_csmv + procedure, pass(a) :: csmm => psb_z_dns_csmm + procedure, pass(a) :: csnmi => psb_z_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_z_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_z_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_z_dns_get_diag + procedure, pass(a) :: csgetrow => psb_z_dns_csgetrow + procedure, pass(a) :: get_nz_row => z_dns_get_nz_row + procedure, pass(a) :: trim => psb_z_dns_trim + procedure, pass(a) :: free => z_dns_free + procedure, pass(a) :: mold => psb_z_dns_mold + + end type psb_z_dns_sparse_mat + + private :: z_dns_get_nzeros, z_dns_free, z_dns_get_fmt, & + & z_dns_get_size, z_dns_sizeof, z_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_z_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_z_dns_reallocate_nz(nz,a) + import :: psb_z_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dns_sparse_mat), intent(inout) :: a + end subroutine psb_z_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_z_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_z_dns_trim(a) + import :: psb_z_dns_sparse_mat + class(psb_z_dns_sparse_mat), intent(inout) :: a + end subroutine psb_z_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_z_dns_sparse_mat + !! \brief Allocate a class(psb_z_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_z_dns_mold(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_z_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_z_dns_allocate_mnnz(m,n,a,nz) + import :: psb_z_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Copy and convert to psb_z_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_z_cp_dns_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_dns_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Copy and convert from psb_z_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_z_cp_dns_from_coo(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Convert to psb_z_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_z_mv_dns_to_coo(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Convert from psb_z_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_z_mv_dns_from_coo(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_z_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_z_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_z_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_z_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_z_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_z_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_z_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_z_dns_csnmi(a) result(res) + import :: psb_z_dns_sparse_mat, psb_dpk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_z_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_z_dns_get_diag(a,d,info) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_z_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function z_dns_sizeof(a) result(res) + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function z_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_z_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function z_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function z_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_z_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function z_dns_get_nzeros(a) result(res) + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function z_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_z_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function z_dns_get_size(a) result(res) + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function z_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_z_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function z_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function z_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_z_dns_sparse_mat + !! Name says all + + subroutine z_dns_free(a) + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_dns_free + + +end module psb_z_dns_mat_mod diff --git a/ext/psb_z_ell_mat_mod.f90 b/ext/psb_z_ell_mat_mod.f90 new file mode 100644 index 00000000..52dc62b1 --- /dev/null +++ b/ext/psb_z_ell_mat_mod.f90 @@ -0,0 +1,544 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_ell_mat_mod + + use psb_z_base_mat_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + complex(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => z_ell_is_by_rows + procedure, pass(a) :: get_size => z_ell_get_size + procedure, pass(a) :: get_nzeros => z_ell_get_nzeros + procedure, nopass :: get_fmt => z_ell_get_fmt + procedure, pass(a) :: sizeof => z_ell_sizeof + procedure, pass(a) :: csmm => psb_z_ell_csmm + procedure, pass(a) :: csmv => psb_z_ell_csmv + procedure, pass(a) :: inner_cssm => psb_z_ell_cssm + procedure, pass(a) :: inner_cssv => psb_z_ell_cssv + procedure, pass(a) :: scals => psb_z_ell_scals + procedure, pass(a) :: scalv => psb_z_ell_scal + procedure, pass(a) :: maxval => psb_z_ell_maxval + procedure, pass(a) :: csnmi => psb_z_ell_csnmi + procedure, pass(a) :: csnm1 => psb_z_ell_csnm1 + procedure, pass(a) :: rowsum => psb_z_ell_rowsum + procedure, pass(a) :: arwsum => psb_z_ell_arwsum + procedure, pass(a) :: colsum => psb_z_ell_colsum + procedure, pass(a) :: aclsum => psb_z_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_z_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_z_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_z_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_z_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_z_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_z_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_z_ell_csput_a + procedure, pass(a) :: get_diag => psb_z_ell_get_diag + procedure, pass(a) :: csgetptn => psb_z_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_z_ell_csgetrow + procedure, pass(a) :: get_nz_row => z_ell_get_nz_row + procedure, pass(a) :: reinit => psb_z_ell_reinit + procedure, pass(a) :: trim => psb_z_ell_trim + procedure, pass(a) :: print => psb_z_ell_print + procedure, pass(a) :: free => z_ell_free + procedure, pass(a) :: mold => psb_z_ell_mold + + end type psb_z_ell_sparse_mat + + private :: z_ell_get_nzeros, z_ell_free, z_ell_get_fmt, & + & z_ell_get_size, z_ell_sizeof, z_ell_get_nz_row, & + & z_ell_is_by_rows + + interface + subroutine psb_z_ell_reallocate_nz(nz,a) + import :: psb_z_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_ell_sparse_mat), intent(inout) :: a + end subroutine psb_z_ell_reallocate_nz + end interface + + interface + subroutine psb_z_ell_reinit(a,clear) + import :: psb_z_ell_sparse_mat + class(psb_z_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_z_ell_reinit + end interface + + interface + subroutine psb_z_ell_trim(a) + import :: psb_z_ell_sparse_mat + class(psb_z_ell_sparse_mat), intent(inout) :: a + end subroutine psb_z_ell_trim + end interface + + interface + subroutine psb_z_ell_mold(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_mold + end interface + + interface + subroutine psb_z_ell_allocate_mnnz(m,n,a,nz) + import :: psb_z_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_ell_allocate_mnnz + end interface + + interface + subroutine psb_z_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_ell_print + end interface + + interface + subroutine psb_z_cp_ell_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_ell_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_to_coo + end interface + + interface + subroutine psb_z_cp_ell_from_coo(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_from_coo + end interface + + interface + subroutine psb_z_cp_ell_to_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_to_fmt + end interface + + interface + subroutine psb_z_cp_ell_from_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_from_fmt + end interface + + interface + subroutine psb_z_mv_ell_to_coo(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_to_coo + end interface + + interface + subroutine psb_z_mv_ell_from_coo(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_from_coo + end interface + + interface + subroutine psb_z_mv_ell_to_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_to_fmt + end interface + + interface + subroutine psb_z_mv_ell_from_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_from_fmt + end interface + + interface + subroutine psb_z_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_csput_a + end interface + + interface + subroutine psb_z_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_ell_csgetptn + end interface + + interface + subroutine psb_z_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_ell_csgetrow + end interface + + interface + subroutine psb_z_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_ell_csgetblk + end interface + + interface + subroutine psb_z_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_cssv + subroutine psb_z_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_cssm + end interface + + interface + subroutine psb_z_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_csmv + subroutine psb_z_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_csmm + end interface + + + interface + function psb_z_ell_maxval(a) result(res) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_ell_maxval + end interface + + interface + function psb_z_ell_csnmi(a) result(res) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_ell_csnmi + end interface + + interface + function psb_z_ell_csnm1(a) result(res) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_ell_csnm1 + end interface + + interface + subroutine psb_z_ell_rowsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_rowsum + end interface + + interface + subroutine psb_z_ell_arwsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_arwsum + end interface + + interface + subroutine psb_z_ell_colsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_colsum + end interface + + interface + subroutine psb_z_ell_aclsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_aclsum + end interface + + interface + subroutine psb_z_ell_get_diag(a,d,info) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_get_diag + end interface + + interface + subroutine psb_z_ell_scal(d,a,info,side) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_ell_scal + end interface + + interface + subroutine psb_z_ell_scals(d,a,info) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_scals + end interface + + interface + subroutine psi_z_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_z_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_ell_is_by_rows(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function z_ell_is_by_rows + + function z_ell_sizeof(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function z_ell_sizeof + + function z_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function z_ell_get_fmt + + function z_ell_get_nzeros(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function z_ell_get_nzeros + + function z_ell_get_size(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function z_ell_get_size + + + function z_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function z_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_ell_free(a) + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_ell_free + + +end module psb_z_ell_mat_mod diff --git a/ext/psb_z_hdia_mat_mod.f90 b/ext/psb_z_hdia_mat_mod.f90 new file mode 100644 index 00000000..e7c11321 --- /dev/null +++ b/ext/psb_z_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_z_hdia_mat_mod + + use psb_z_base_mat_mod + + + type, extends(psb_z_base_sparse_mat) :: psb_z_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + complex(psb_dpk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => z_hdia_get_size + procedure, pass(a) :: get_nzeros => z_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => z_hdia_set_nzeros + procedure, nopass :: get_fmt => z_hdia_get_fmt + procedure, pass(a) :: sizeof => z_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_z_hdia_csmm + procedure, pass(a) :: csmv => psb_z_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_z_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_z_hdia_cssv + ! procedure, pass(a) :: scals => psb_z_hdia_scals + ! procedure, pass(a) :: scalv => psb_z_hdia_scal + ! procedure, pass(a) :: maxval => psb_z_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_z_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_z_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_z_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_z_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_z_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_z_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_z_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_z_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_z_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_z_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_z_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_z_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_z_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_z_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_z_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_z_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => z_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_z_hdia_reinit + ! procedure, pass(a) :: trim => psb_z_hdia_trim + procedure, pass(a) :: print => psb_z_hdia_print + procedure, pass(a) :: free => z_hdia_free + procedure, pass(a) :: mold => psb_z_hdia_mold + + end type psb_z_hdia_sparse_mat + + private :: z_hdia_get_nzeros, z_hdia_set_nzeros, z_hdia_free, & + & z_hdia_get_fmt, z_hdia_sizeof +!!$ & +!!$ & z_hdia_get_nz_row z_hdia_get_size, + +!!$ interface +!!$ subroutine psb_z_hdia_reallocate_nz(nz,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_reinit(a,clear) +!!$ import :: psb_z_hdia_sparse_mat +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_z_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_trim(a) +!!$ import :: psb_z_hdia_sparse_mat +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_hdia_trim +!!$ end interface + + interface + subroutine psb_z_hdia_mold(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hdia_mold + end interface + + interface + subroutine psb_z_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_z_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_hdia_allocate_mnnz + end interface + + interface + subroutine psb_z_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_hdia_print + end interface + + interface + subroutine psb_z_cp_hdia_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_hdia_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hdia_to_coo + end interface + + interface + subroutine psb_z_cp_hdia_from_coo(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_z_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_z_mv_hdia_to_coo(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hdia_to_coo + end interface + + interface + subroutine psb_z_mv_hdia_from_coo(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_z_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_z_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ complex(psb_dpk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_z_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_z_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_z_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdia_cssv +!!$ subroutine psb_z_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdia_cssm +!!$ end interface + + interface + subroutine psb_z_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hdia_csmv +!!$ subroutine psb_z_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_z_hdia_maxval(a) result(res) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_z_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_z_hdia_csnmi(a) result(res) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_z_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_z_hdia_csnm1(a) result(res) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_z_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_rowsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_arwsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_colsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_aclsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_get_diag(a,d,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_scal(d,a,info,side) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_z_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_z_hdia_scals(d,a,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * (2*psb_sizeof_dp) + + end function z_hdia_sizeof + + function z_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function z_hdia_get_fmt + + function z_hdia_get_nzeros(a) result(res) + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function z_hdia_get_nzeros + + subroutine z_hdia_set_nzeros(a,nz) + implicit none + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine z_hdia_set_nzeros + + ! function z_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_z_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function z_hdia_get_size + + + ! function z_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_z_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function z_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine z_hdia_free(a) + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_hdia_free + + +end module psb_z_hdia_mat_mod diff --git a/ext/psb_z_hll_mat_mod.f90 b/ext/psb_z_hll_mat_mod.f90 new file mode 100644 index 00000000..98eb403f --- /dev/null +++ b/ext/psb_z_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_hll_mat_mod + + use psb_z_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + complex(psb_dpk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => z_hll_get_hksz + procedure, pass(a) :: set_hksz => z_hll_set_hksz + procedure, pass(a) :: get_size => z_hll_get_size + procedure, pass(a) :: set_nzeros => z_hll_set_nzeros + procedure, pass(a) :: get_nzeros => z_hll_get_nzeros + procedure, nopass :: get_fmt => z_hll_get_fmt + procedure, pass(a) :: sizeof => z_hll_sizeof + procedure, pass(a) :: csmm => psb_z_hll_csmm + procedure, pass(a) :: csmv => psb_z_hll_csmv + procedure, pass(a) :: inner_cssm => psb_z_hll_cssm + procedure, pass(a) :: inner_cssv => psb_z_hll_cssv + procedure, pass(a) :: scals => psb_z_hll_scals + procedure, pass(a) :: scalv => psb_z_hll_scal + procedure, pass(a) :: maxval => psb_z_hll_maxval + procedure, pass(a) :: csnmi => psb_z_hll_csnmi + procedure, pass(a) :: csnm1 => psb_z_hll_csnm1 + procedure, pass(a) :: rowsum => psb_z_hll_rowsum + procedure, pass(a) :: arwsum => psb_z_hll_arwsum + procedure, pass(a) :: colsum => psb_z_hll_colsum + procedure, pass(a) :: aclsum => psb_z_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_z_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_z_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_z_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_z_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_z_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_z_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_z_hll_csput_a + procedure, pass(a) :: get_diag => psb_z_hll_get_diag + procedure, pass(a) :: csgetptn => psb_z_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_z_hll_csgetrow + procedure, pass(a) :: get_nz_row => z_hll_get_nz_row + procedure, pass(a) :: reinit => psb_z_hll_reinit + procedure, pass(a) :: print => psb_z_hll_print + procedure, pass(a) :: free => z_hll_free + procedure, pass(a) :: mold => psb_z_hll_mold + + end type psb_z_hll_sparse_mat + + private :: z_hll_get_nzeros, z_hll_free, z_hll_get_fmt, & + & z_hll_get_size, z_hll_sizeof, z_hll_get_nz_row, & + & z_hll_set_nzeros, z_hll_get_hksz, z_hll_set_hksz + + interface + subroutine psb_z_hll_reallocate_nz(nz,a) + import :: psb_z_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hll_sparse_mat), intent(inout) :: a + end subroutine psb_z_hll_reallocate_nz + end interface + + interface + subroutine psb_z_hll_reinit(a,clear) + import :: psb_z_hll_sparse_mat + class(psb_z_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_z_hll_reinit + end interface + + interface + subroutine psb_z_hll_mold(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_mold + end interface + + interface + subroutine psb_z_hll_allocate_mnnz(m,n,a,nz) + import :: psb_z_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_hll_allocate_mnnz + end interface + + interface + subroutine psb_z_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_hll_print + end interface + + interface + subroutine psb_z_cp_hll_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_hll_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_to_coo + end interface + + interface + subroutine psb_z_cp_hll_from_coo(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_from_coo + end interface + + interface + subroutine psb_z_cp_hll_to_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_to_fmt + end interface + + interface + subroutine psb_z_cp_hll_from_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_from_fmt + end interface + + interface + subroutine psb_z_mv_hll_to_coo(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_to_coo + end interface + + interface + subroutine psb_z_mv_hll_from_coo(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_from_coo + end interface + + interface + subroutine psb_z_mv_hll_to_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_to_fmt + end interface + + interface + subroutine psb_z_mv_hll_from_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_from_fmt + end interface + + interface + subroutine psb_z_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_csput_a + end interface + + interface + subroutine psb_z_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_hll_csgetptn + end interface + + interface + subroutine psb_z_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_hll_csgetrow + end interface + + interface + subroutine psb_z_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_hll_csgetblk + end interface + + interface + subroutine psb_z_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_cssv + subroutine psb_z_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_cssm + end interface + + interface + subroutine psb_z_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_csmv + subroutine psb_z_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_csmm + end interface + + + interface + function psb_z_hll_maxval(a) result(res) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_hll_maxval + end interface + + interface + function psb_z_hll_csnmi(a) result(res) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_hll_csnmi + end interface + + interface + function psb_z_hll_csnm1(a) result(res) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_hll_csnm1 + end interface + + interface + subroutine psb_z_hll_rowsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_rowsum + end interface + + interface + subroutine psb_z_hll_arwsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_arwsum + end interface + + interface + subroutine psb_z_hll_colsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_colsum + end interface + + interface + subroutine psb_z_hll_aclsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_aclsum + end interface + + interface + subroutine psb_z_hll_get_diag(a,d,info) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_get_diag + end interface + + interface + subroutine psb_z_hll_scal(d,a,info,side) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_hll_scal + end interface + + interface + subroutine psb_z_hll_scals(d,a,info) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_z_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_z_hll_sparse_mat, psb_ipk_, psb_z_coo_sparse_mat + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_z_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_hll_sizeof(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function z_hll_sizeof + + function z_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function z_hll_get_fmt + + subroutine z_hll_set_nzeros(a,n) + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine z_hll_set_nzeros + + function z_hll_get_nzeros(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function z_hll_get_nzeros + + function z_hll_get_size(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function z_hll_get_size + + + + function z_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function z_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_hll_free(a) + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine z_hll_free + + subroutine z_hll_set_hksz(a,n) + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine z_hll_set_hksz + + function z_hll_get_hksz(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function z_hll_get_hksz + +end module psb_z_hll_mat_mod diff --git a/ext/psi_c_ext_util_mod.f90 b/ext/psi_c_ext_util_mod.f90 new file mode 100644 index 00000000..e58c0d93 --- /dev/null +++ b/ext/psi_c_ext_util_mod.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_c_ext_util_mod + + use psb_base_mod, only : psb_ipk_, psb_spk_ + + interface psi_xtr_dia_from_coo + subroutine psi_c_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata,rdisp) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd, ncd, ia(:), ja(:), d(:) + complex(psb_spk_), intent(in) :: val(:) + complex(psb_spk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + end subroutine psi_c_xtr_dia_from_coo + end interface + + interface psi_xtr_ell_from_coo + subroutine psi_c_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,& + & valc,ja,val,irn,diag,ld) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + complex(psb_spk_) :: valc(*), val(ld,*) + + end subroutine psi_c_xtr_ell_from_coo + end interface psi_xtr_ell_from_coo + + interface psi_xtr_coo_from_dia + subroutine psi_c_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,& + & info,rdisp) + import :: psb_ipk_, psb_spk_ + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), nz + complex(psb_spk_), intent(inout) :: val(:) + complex(psb_spk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + end subroutine psi_c_xtr_coo_from_dia + end interface + +end module psi_c_ext_util_mod diff --git a/ext/psi_d_ext_util_mod.f90 b/ext/psi_d_ext_util_mod.f90 new file mode 100644 index 00000000..07de8ad1 --- /dev/null +++ b/ext/psi_d_ext_util_mod.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_d_ext_util_mod + + use psb_base_mod, only : psb_ipk_, psb_dpk_ + + interface psi_xtr_dia_from_coo + subroutine psi_d_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata,rdisp) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd, ncd, ia(:), ja(:), d(:) + real(psb_dpk_), intent(in) :: val(:) + real(psb_dpk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + end subroutine psi_d_xtr_dia_from_coo + end interface + + interface psi_xtr_ell_from_coo + subroutine psi_d_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,& + & valc,ja,val,irn,diag,ld) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + real(psb_dpk_) :: valc(*), val(ld,*) + + end subroutine psi_d_xtr_ell_from_coo + end interface psi_xtr_ell_from_coo + + interface psi_xtr_coo_from_dia + subroutine psi_d_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,& + & info,rdisp) + import :: psb_ipk_, psb_dpk_ + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), nz + real(psb_dpk_), intent(inout) :: val(:) + real(psb_dpk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + end subroutine psi_d_xtr_coo_from_dia + end interface + +end module psi_d_ext_util_mod diff --git a/ext/psi_ext_util_mod.f90 b/ext/psi_ext_util_mod.f90 new file mode 100644 index 00000000..afb2c749 --- /dev/null +++ b/ext/psi_ext_util_mod.f90 @@ -0,0 +1,41 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_ext_util_mod + + use psi_i_ext_util_mod + use psi_s_ext_util_mod + use psi_c_ext_util_mod + use psi_d_ext_util_mod + use psi_z_ext_util_mod + +end module psi_ext_util_mod diff --git a/ext/psi_i_ext_util_mod.f90 b/ext/psi_i_ext_util_mod.f90 new file mode 100644 index 00000000..ac073f1d --- /dev/null +++ b/ext/psi_i_ext_util_mod.f90 @@ -0,0 +1,175 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_i_ext_util_mod + + use psb_base_mod, only : psb_ipk_ + ! + ! Hack size for HLL format. + ! + integer(psb_ipk_), parameter :: psb_hksz_def_ = 32 + integer(psb_ipk_), private, save :: psb_hksz = psb_hksz_def_ + logical, private, save :: psb_hll_use_vector = .true. +contains + + function psi_get_hksz() result(res) + implicit none + integer(psb_ipk_) :: res + res = psb_hksz + end function psi_get_hksz + + subroutine psi_set_hksz(size) + implicit none + integer(psb_ipk_), intent(in) :: size + if (size > 0) psb_hksz = size + end subroutine psi_set_hksz + + subroutine psi_set_hll_vector(val) + implicit none + logical, optional :: val + if (present(val)) then + psb_hll_use_vector = val + else + psb_hll_use_vector = .true. + end if + + end subroutine psi_set_hll_vector + + function psi_get_hll_vector() result(res) + implicit none + logical :: res + + res = psb_hll_use_vector + end function psi_get_hll_vector + + + ! + ! Compute offsets and allocation for DIAgonal storage. + ! Input: + ! nr,nc,nz,ia,ja: the matrix pattern in COO + ! Note: This routine is designed to be called + ! with either a full matrix or an horizontal stripe, + ! with the COO entries sorted in row major order, hence + ! it will handle the conversion of a strip, so it can + ! be used by both DIA and HDIA. In both cases NR and NC + ! *MUST* be the *GLOBAL* number of rows/columns, not those + ! of the strips, i.e. it must be that all entris in IA <=NR + ! and JA <= NC. + ! Output: + ! nd: number of nonzero diagonals + ! d: d(k) contains the index inside offset of diagonal k, + ! which is, if A(I,J) /= 0 then K=NR+J-I, or (optionally) 0. + ! *MUST* be allocated on the *global* size NR+NC-1 + ! + ! offset: for each of the ND nonzero diagonals, its offset J-I + ! + ! Notes: D and OFFSET together represent the set of diagonals; + ! D can be used outside to quickly find which entry of OFFSET + ! a given a(i,j) corresponds to, without doing a search. + ! + ! 1. Optionally init D vector to zeros + ! 2. Walk through the NZ pairs (I,J): + ! a. if it is a new diagonal add to a heap; + ! b. increase its population count stored in D(J-I+NR) + ! c. Keep track of maximum population count. + ! 3. Go through the ND diagonals, getting them K out of the heap in order: + ! a. Set offset(i) to K-NR == J-I + ! b. Set D(K) = i or 0 (depending on cleard) + ! + ! Setting to 0 allows to reuse this function in a loop in a dry run + ! to estimate the allocation size for HDIA; without settng to 0 we + ! would need to zero the whole vector, resulting + ! in a quadratic overall cost. Outside this subroutine, it is possible + ! to zero selectively the entres in D by using the indices in OFFSET. + ! + ! + subroutine psi_dia_offset_from_coo(nr,nc,nz,ia,ja,nd,d,offset,info,& + & initd,cleard) + use psb_base_mod + + implicit none + + integer(psb_ipk_), intent(in) :: nr, nc, nz, ia(:), ja(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_), intent(out) :: offset(:) + integer(psb_ipk_), intent(out) :: nd + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initd,cleard + + type(psb_i_heap) :: heap + integer(psb_ipk_) :: k,i,j,ir,ic, ndiag, id + logical :: initd_, cleard_ + character(len=20) :: name + + info = psb_success_ + initd_ = .true. + if (present(initd)) initd_ = initd + cleard_ = .false. + if (present(cleard)) cleard_ = cleard + + if (initd_) d(:) = 0 + + ndiag = nr+nc-1 + if (size(d) Date: Fri, 29 Sep 2023 13:54:58 +0200 Subject: [PATCH 005/110] Merge psblas-ext into psblas step 2: GPU formats and configry. --- Make.inc.in | 22 + Makefile | 17 +- config/pac.m4 | 249 ++++ configure | 497 +++++- configure.ac | 69 +- gpu/CUDA/Makefile | 38 + gpu/CUDA/psi_cuda_CopyCooToElg.cuh | 104 ++ gpu/CUDA/psi_cuda_CopyCooToHlg.cuh | 108 ++ gpu/CUDA/psi_cuda_c_CopyCooToElg.cu | 10 + gpu/CUDA/psi_cuda_c_CopyCooToHlg.cu | 10 + gpu/CUDA/psi_cuda_common.cuh | 16 + gpu/CUDA/psi_cuda_d_CopyCooToElg.cu | 10 + gpu/CUDA/psi_cuda_d_CopyCooToHlg.cu | 10 + gpu/CUDA/psi_cuda_s_CopyCooToElg.cu | 10 + gpu/CUDA/psi_cuda_s_CopyCooToHlg.cu | 10 + gpu/CUDA/psi_cuda_z_CopyCooToElg.cu | 10 + gpu/CUDA/psi_cuda_z_CopyCooToHlg.cu | 10 + gpu/Makefile | 134 ++ gpu/base_cusparse_mod.F90 | 117 ++ gpu/c_cusparse_mod.F90 | 305 ++++ gpu/ccusparse.c | 97 ++ gpu/cintrf.h | 51 + gpu/core_mod.f90 | 53 + gpu/cuda_util.c | 808 ++++++++++ gpu/cuda_util.h | 139 ++ gpu/cusparse_mod.F90 | 38 + gpu/cvectordev.c | 325 ++++ gpu/cvectordev.h | 81 + gpu/d_cusparse_mod.F90 | 305 ++++ gpu/dcusparse.c | 95 ++ gpu/diagdev.c | 291 ++++ gpu/diagdev.h | 95 ++ gpu/diagdev_mod.F90 | 231 +++ gpu/dnsdev.c | 383 +++++ gpu/dnsdev.h | 122 ++ gpu/dnsdev_mod.F90 | 275 ++++ gpu/dvectordev.c | 305 ++++ gpu/dvectordev.h | 78 + gpu/elldev.c | 773 ++++++++++ gpu/elldev.h | 183 +++ gpu/elldev_mod.F90 | 326 ++++ gpu/fcusparse.c | 77 + gpu/fcusparse.h | 70 + gpu/fcusparse_fct.h | 770 ++++++++++ gpu/hdiagdev.c | 425 ++++++ gpu/hdiagdev.h | 111 ++ gpu/hdiagdev_mod.F90 | 203 +++ gpu/hlldev.c | 615 ++++++++ gpu/hlldev.h | 161 ++ gpu/hlldev_mod.F90 | 273 ++++ gpu/impl/Makefile | 294 ++++ gpu/impl/psb_c_cp_csrg_from_coo.F90 | 62 + gpu/impl/psb_c_cp_csrg_from_fmt.F90 | 61 + gpu/impl/psb_c_cp_diag_from_coo.F90 | 64 + gpu/impl/psb_c_cp_elg_from_coo.F90 | 184 +++ gpu/impl/psb_c_cp_elg_from_fmt.F90 | 101 ++ gpu/impl/psb_c_cp_hdiag_from_coo.F90 | 73 + gpu/impl/psb_c_cp_hlg_from_coo.F90 | 198 +++ gpu/impl/psb_c_cp_hlg_from_fmt.F90 | 68 + gpu/impl/psb_c_cp_hybg_from_coo.F90 | 64 + gpu/impl/psb_c_cp_hybg_from_fmt.F90 | 62 + gpu/impl/psb_c_csrg_allocate_mnnz.F90 | 68 + gpu/impl/psb_c_csrg_csmm.F90 | 134 ++ gpu/impl/psb_c_csrg_csmv.F90 | 139 ++ gpu/impl/psb_c_csrg_from_gpu.F90 | 73 + gpu/impl/psb_c_csrg_inner_vect_sv.F90 | 136 ++ gpu/impl/psb_c_csrg_mold.F90 | 65 + gpu/impl/psb_c_csrg_reallocate_nz.F90 | 70 + gpu/impl/psb_c_csrg_scal.F90 | 73 + gpu/impl/psb_c_csrg_scals.F90 | 71 + gpu/impl/psb_c_csrg_to_gpu.F90 | 325 ++++ gpu/impl/psb_c_csrg_vect_mv.F90 | 125 ++ gpu/impl/psb_c_diag_csmv.F90 | 136 ++ gpu/impl/psb_c_diag_mold.F90 | 65 + gpu/impl/psb_c_diag_to_gpu.F90 | 74 + gpu/impl/psb_c_diag_vect_mv.F90 | 126 ++ gpu/impl/psb_c_dnsg_mat_impl.F90 | 461 ++++++ gpu/impl/psb_c_elg_allocate_mnnz.F90 | 113 ++ gpu/impl/psb_c_elg_asb.f90 | 65 + gpu/impl/psb_c_elg_csmm.F90 | 134 ++ gpu/impl/psb_c_elg_csmv.F90 | 136 ++ gpu/impl/psb_c_elg_csput.F90 | 248 +++ gpu/impl/psb_c_elg_from_gpu.F90 | 74 + gpu/impl/psb_c_elg_inner_vect_sv.F90 | 89 ++ gpu/impl/psb_c_elg_mold.F90 | 65 + gpu/impl/psb_c_elg_reallocate_nz.F90 | 79 + gpu/impl/psb_c_elg_scal.F90 | 78 + gpu/impl/psb_c_elg_scals.F90 | 73 + gpu/impl/psb_c_elg_to_gpu.F90 | 93 ++ gpu/impl/psb_c_elg_trim.f90 | 62 + gpu/impl/psb_c_elg_vect_mv.F90 | 131 ++ gpu/impl/psb_c_hdiag_csmv.F90 | 136 ++ gpu/impl/psb_c_hdiag_mold.F90 | 65 + gpu/impl/psb_c_hdiag_to_gpu.F90 | 86 ++ gpu/impl/psb_c_hdiag_vect_mv.F90 | 126 ++ gpu/impl/psb_c_hlg_allocate_mnnz.F90 | 71 + gpu/impl/psb_c_hlg_csmm.F90 | 132 ++ gpu/impl/psb_c_hlg_csmv.F90 | 135 ++ gpu/impl/psb_c_hlg_from_gpu.F90 | 76 + gpu/impl/psb_c_hlg_inner_vect_sv.F90 | 81 + gpu/impl/psb_c_hlg_mold.F90 | 64 + gpu/impl/psb_c_hlg_reallocate_nz.F90 | 67 + gpu/impl/psb_c_hlg_scal.F90 | 75 + gpu/impl/psb_c_hlg_scals.F90 | 73 + gpu/impl/psb_c_hlg_to_gpu.F90 | 68 + gpu/impl/psb_c_hlg_vect_mv.F90 | 129 ++ gpu/impl/psb_c_hybg_allocate_mnnz.F90 | 69 + gpu/impl/psb_c_hybg_csmm.F90 | 135 ++ gpu/impl/psb_c_hybg_csmv.F90 | 138 ++ gpu/impl/psb_c_hybg_inner_vect_sv.F90 | 138 ++ gpu/impl/psb_c_hybg_mold.F90 | 66 + gpu/impl/psb_c_hybg_reallocate_nz.F90 | 71 + gpu/impl/psb_c_hybg_scal.F90 | 76 + gpu/impl/psb_c_hybg_scals.F90 | 76 + gpu/impl/psb_c_hybg_to_gpu.F90 | 154 ++ gpu/impl/psb_c_hybg_vect_mv.F90 | 127 ++ gpu/impl/psb_c_mv_csrg_from_coo.F90 | 65 + gpu/impl/psb_c_mv_csrg_from_fmt.F90 | 63 + gpu/impl/psb_c_mv_diag_from_coo.F90 | 69 + gpu/impl/psb_c_mv_elg_from_coo.F90 | 61 + gpu/impl/psb_c_mv_elg_from_fmt.F90 | 99 ++ gpu/impl/psb_c_mv_hdiag_from_coo.F90 | 74 + gpu/impl/psb_c_mv_hlg_from_coo.F90 | 61 + gpu/impl/psb_c_mv_hlg_from_fmt.F90 | 62 + gpu/impl/psb_c_mv_hybg_from_coo.F90 | 65 + gpu/impl/psb_c_mv_hybg_from_fmt.F90 | 62 + gpu/impl/psb_d_cp_csrg_from_coo.F90 | 62 + gpu/impl/psb_d_cp_csrg_from_fmt.F90 | 61 + gpu/impl/psb_d_cp_diag_from_coo.F90 | 64 + gpu/impl/psb_d_cp_elg_from_coo.F90 | 184 +++ gpu/impl/psb_d_cp_elg_from_fmt.F90 | 101 ++ gpu/impl/psb_d_cp_hdiag_from_coo.F90 | 73 + gpu/impl/psb_d_cp_hlg_from_coo.F90 | 198 +++ gpu/impl/psb_d_cp_hlg_from_fmt.F90 | 68 + gpu/impl/psb_d_cp_hybg_from_coo.F90 | 64 + gpu/impl/psb_d_cp_hybg_from_fmt.F90 | 62 + gpu/impl/psb_d_csrg_allocate_mnnz.F90 | 68 + gpu/impl/psb_d_csrg_csmm.F90 | 134 ++ gpu/impl/psb_d_csrg_csmv.F90 | 139 ++ gpu/impl/psb_d_csrg_from_gpu.F90 | 73 + gpu/impl/psb_d_csrg_inner_vect_sv.F90 | 136 ++ gpu/impl/psb_d_csrg_mold.F90 | 65 + gpu/impl/psb_d_csrg_reallocate_nz.F90 | 70 + gpu/impl/psb_d_csrg_scal.F90 | 73 + gpu/impl/psb_d_csrg_scals.F90 | 71 + gpu/impl/psb_d_csrg_to_gpu.F90 | 325 ++++ gpu/impl/psb_d_csrg_vect_mv.F90 | 125 ++ gpu/impl/psb_d_diag_csmv.F90 | 136 ++ gpu/impl/psb_d_diag_mold.F90 | 65 + gpu/impl/psb_d_diag_to_gpu.F90 | 74 + gpu/impl/psb_d_diag_vect_mv.F90 | 126 ++ gpu/impl/psb_d_dnsg_mat_impl.F90 | 461 ++++++ gpu/impl/psb_d_elg_allocate_mnnz.F90 | 113 ++ gpu/impl/psb_d_elg_asb.f90 | 65 + gpu/impl/psb_d_elg_csmm.F90 | 134 ++ gpu/impl/psb_d_elg_csmv.F90 | 136 ++ gpu/impl/psb_d_elg_csput.F90 | 248 +++ gpu/impl/psb_d_elg_from_gpu.F90 | 74 + gpu/impl/psb_d_elg_inner_vect_sv.F90 | 89 ++ gpu/impl/psb_d_elg_mold.F90 | 65 + gpu/impl/psb_d_elg_reallocate_nz.F90 | 79 + gpu/impl/psb_d_elg_scal.F90 | 78 + gpu/impl/psb_d_elg_scals.F90 | 73 + gpu/impl/psb_d_elg_to_gpu.F90 | 93 ++ gpu/impl/psb_d_elg_trim.f90 | 62 + gpu/impl/psb_d_elg_vect_mv.F90 | 131 ++ gpu/impl/psb_d_hdiag_csmv.F90 | 136 ++ gpu/impl/psb_d_hdiag_mold.F90 | 65 + gpu/impl/psb_d_hdiag_to_gpu.F90 | 86 ++ gpu/impl/psb_d_hdiag_vect_mv.F90 | 126 ++ gpu/impl/psb_d_hlg_allocate_mnnz.F90 | 71 + gpu/impl/psb_d_hlg_csmm.F90 | 132 ++ gpu/impl/psb_d_hlg_csmv.F90 | 135 ++ gpu/impl/psb_d_hlg_from_gpu.F90 | 76 + gpu/impl/psb_d_hlg_inner_vect_sv.F90 | 81 + gpu/impl/psb_d_hlg_mold.F90 | 64 + gpu/impl/psb_d_hlg_reallocate_nz.F90 | 67 + gpu/impl/psb_d_hlg_scal.F90 | 75 + gpu/impl/psb_d_hlg_scals.F90 | 73 + gpu/impl/psb_d_hlg_to_gpu.F90 | 68 + gpu/impl/psb_d_hlg_vect_mv.F90 | 129 ++ gpu/impl/psb_d_hybg_allocate_mnnz.F90 | 69 + gpu/impl/psb_d_hybg_csmm.F90 | 135 ++ gpu/impl/psb_d_hybg_csmv.F90 | 138 ++ gpu/impl/psb_d_hybg_inner_vect_sv.F90 | 138 ++ gpu/impl/psb_d_hybg_mold.F90 | 66 + gpu/impl/psb_d_hybg_reallocate_nz.F90 | 71 + gpu/impl/psb_d_hybg_scal.F90 | 76 + gpu/impl/psb_d_hybg_scals.F90 | 76 + gpu/impl/psb_d_hybg_to_gpu.F90 | 154 ++ gpu/impl/psb_d_hybg_vect_mv.F90 | 127 ++ gpu/impl/psb_d_mv_csrg_from_coo.F90 | 65 + gpu/impl/psb_d_mv_csrg_from_fmt.F90 | 63 + gpu/impl/psb_d_mv_diag_from_coo.F90 | 69 + gpu/impl/psb_d_mv_elg_from_coo.F90 | 61 + gpu/impl/psb_d_mv_elg_from_fmt.F90 | 99 ++ gpu/impl/psb_d_mv_hdiag_from_coo.F90 | 74 + gpu/impl/psb_d_mv_hlg_from_coo.F90 | 61 + gpu/impl/psb_d_mv_hlg_from_fmt.F90 | 62 + gpu/impl/psb_d_mv_hybg_from_coo.F90 | 65 + gpu/impl/psb_d_mv_hybg_from_fmt.F90 | 62 + gpu/impl/psb_s_cp_csrg_from_coo.F90 | 62 + gpu/impl/psb_s_cp_csrg_from_fmt.F90 | 61 + gpu/impl/psb_s_cp_diag_from_coo.F90 | 64 + gpu/impl/psb_s_cp_elg_from_coo.F90 | 184 +++ gpu/impl/psb_s_cp_elg_from_fmt.F90 | 101 ++ gpu/impl/psb_s_cp_hdiag_from_coo.F90 | 73 + gpu/impl/psb_s_cp_hlg_from_coo.F90 | 198 +++ gpu/impl/psb_s_cp_hlg_from_fmt.F90 | 68 + gpu/impl/psb_s_cp_hybg_from_coo.F90 | 64 + gpu/impl/psb_s_cp_hybg_from_fmt.F90 | 62 + gpu/impl/psb_s_csrg_allocate_mnnz.F90 | 68 + gpu/impl/psb_s_csrg_csmm.F90 | 134 ++ gpu/impl/psb_s_csrg_csmv.F90 | 139 ++ gpu/impl/psb_s_csrg_from_gpu.F90 | 73 + gpu/impl/psb_s_csrg_inner_vect_sv.F90 | 136 ++ gpu/impl/psb_s_csrg_mold.F90 | 65 + gpu/impl/psb_s_csrg_reallocate_nz.F90 | 70 + gpu/impl/psb_s_csrg_scal.F90 | 73 + gpu/impl/psb_s_csrg_scals.F90 | 71 + gpu/impl/psb_s_csrg_to_gpu.F90 | 325 ++++ gpu/impl/psb_s_csrg_vect_mv.F90 | 125 ++ gpu/impl/psb_s_diag_csmv.F90 | 136 ++ gpu/impl/psb_s_diag_mold.F90 | 65 + gpu/impl/psb_s_diag_to_gpu.F90 | 74 + gpu/impl/psb_s_diag_vect_mv.F90 | 126 ++ gpu/impl/psb_s_dnsg_mat_impl.F90 | 461 ++++++ gpu/impl/psb_s_elg_allocate_mnnz.F90 | 113 ++ gpu/impl/psb_s_elg_asb.f90 | 65 + gpu/impl/psb_s_elg_csmm.F90 | 134 ++ gpu/impl/psb_s_elg_csmv.F90 | 136 ++ gpu/impl/psb_s_elg_csput.F90 | 248 +++ gpu/impl/psb_s_elg_from_gpu.F90 | 74 + gpu/impl/psb_s_elg_inner_vect_sv.F90 | 89 ++ gpu/impl/psb_s_elg_mold.F90 | 65 + gpu/impl/psb_s_elg_reallocate_nz.F90 | 79 + gpu/impl/psb_s_elg_scal.F90 | 78 + gpu/impl/psb_s_elg_scals.F90 | 73 + gpu/impl/psb_s_elg_to_gpu.F90 | 93 ++ gpu/impl/psb_s_elg_trim.f90 | 62 + gpu/impl/psb_s_elg_vect_mv.F90 | 131 ++ gpu/impl/psb_s_hdiag_csmv.F90 | 136 ++ gpu/impl/psb_s_hdiag_mold.F90 | 65 + gpu/impl/psb_s_hdiag_to_gpu.F90 | 86 ++ gpu/impl/psb_s_hdiag_vect_mv.F90 | 126 ++ gpu/impl/psb_s_hlg_allocate_mnnz.F90 | 71 + gpu/impl/psb_s_hlg_csmm.F90 | 132 ++ gpu/impl/psb_s_hlg_csmv.F90 | 135 ++ gpu/impl/psb_s_hlg_from_gpu.F90 | 76 + gpu/impl/psb_s_hlg_inner_vect_sv.F90 | 81 + gpu/impl/psb_s_hlg_mold.F90 | 64 + gpu/impl/psb_s_hlg_reallocate_nz.F90 | 67 + gpu/impl/psb_s_hlg_scal.F90 | 75 + gpu/impl/psb_s_hlg_scals.F90 | 73 + gpu/impl/psb_s_hlg_to_gpu.F90 | 68 + gpu/impl/psb_s_hlg_vect_mv.F90 | 129 ++ gpu/impl/psb_s_hybg_allocate_mnnz.F90 | 69 + gpu/impl/psb_s_hybg_csmm.F90 | 135 ++ gpu/impl/psb_s_hybg_csmv.F90 | 138 ++ gpu/impl/psb_s_hybg_inner_vect_sv.F90 | 138 ++ gpu/impl/psb_s_hybg_mold.F90 | 66 + gpu/impl/psb_s_hybg_reallocate_nz.F90 | 71 + gpu/impl/psb_s_hybg_scal.F90 | 76 + gpu/impl/psb_s_hybg_scals.F90 | 76 + gpu/impl/psb_s_hybg_to_gpu.F90 | 154 ++ gpu/impl/psb_s_hybg_vect_mv.F90 | 127 ++ gpu/impl/psb_s_mv_csrg_from_coo.F90 | 65 + gpu/impl/psb_s_mv_csrg_from_fmt.F90 | 63 + gpu/impl/psb_s_mv_diag_from_coo.F90 | 69 + gpu/impl/psb_s_mv_elg_from_coo.F90 | 61 + gpu/impl/psb_s_mv_elg_from_fmt.F90 | 99 ++ gpu/impl/psb_s_mv_hdiag_from_coo.F90 | 74 + gpu/impl/psb_s_mv_hlg_from_coo.F90 | 61 + gpu/impl/psb_s_mv_hlg_from_fmt.F90 | 62 + gpu/impl/psb_s_mv_hybg_from_coo.F90 | 65 + gpu/impl/psb_s_mv_hybg_from_fmt.F90 | 62 + gpu/impl/psb_z_cp_csrg_from_coo.F90 | 62 + gpu/impl/psb_z_cp_csrg_from_fmt.F90 | 61 + gpu/impl/psb_z_cp_diag_from_coo.F90 | 64 + gpu/impl/psb_z_cp_elg_from_coo.F90 | 184 +++ gpu/impl/psb_z_cp_elg_from_fmt.F90 | 101 ++ gpu/impl/psb_z_cp_hdiag_from_coo.F90 | 73 + gpu/impl/psb_z_cp_hlg_from_coo.F90 | 198 +++ gpu/impl/psb_z_cp_hlg_from_fmt.F90 | 68 + gpu/impl/psb_z_cp_hybg_from_coo.F90 | 64 + gpu/impl/psb_z_cp_hybg_from_fmt.F90 | 62 + gpu/impl/psb_z_csrg_allocate_mnnz.F90 | 68 + gpu/impl/psb_z_csrg_csmm.F90 | 134 ++ gpu/impl/psb_z_csrg_csmv.F90 | 139 ++ gpu/impl/psb_z_csrg_from_gpu.F90 | 73 + gpu/impl/psb_z_csrg_inner_vect_sv.F90 | 136 ++ gpu/impl/psb_z_csrg_mold.F90 | 65 + gpu/impl/psb_z_csrg_reallocate_nz.F90 | 70 + gpu/impl/psb_z_csrg_scal.F90 | 73 + gpu/impl/psb_z_csrg_scals.F90 | 71 + gpu/impl/psb_z_csrg_to_gpu.F90 | 325 ++++ gpu/impl/psb_z_csrg_vect_mv.F90 | 125 ++ gpu/impl/psb_z_diag_csmv.F90 | 136 ++ gpu/impl/psb_z_diag_mold.F90 | 65 + gpu/impl/psb_z_diag_to_gpu.F90 | 74 + gpu/impl/psb_z_diag_vect_mv.F90 | 126 ++ gpu/impl/psb_z_dnsg_mat_impl.F90 | 461 ++++++ gpu/impl/psb_z_elg_allocate_mnnz.F90 | 113 ++ gpu/impl/psb_z_elg_asb.f90 | 65 + gpu/impl/psb_z_elg_csmm.F90 | 134 ++ gpu/impl/psb_z_elg_csmv.F90 | 136 ++ gpu/impl/psb_z_elg_csput.F90 | 248 +++ gpu/impl/psb_z_elg_from_gpu.F90 | 74 + gpu/impl/psb_z_elg_inner_vect_sv.F90 | 89 ++ gpu/impl/psb_z_elg_mold.F90 | 65 + gpu/impl/psb_z_elg_reallocate_nz.F90 | 79 + gpu/impl/psb_z_elg_scal.F90 | 78 + gpu/impl/psb_z_elg_scals.F90 | 73 + gpu/impl/psb_z_elg_to_gpu.F90 | 93 ++ gpu/impl/psb_z_elg_trim.f90 | 62 + gpu/impl/psb_z_elg_vect_mv.F90 | 131 ++ gpu/impl/psb_z_hdiag_csmv.F90 | 136 ++ gpu/impl/psb_z_hdiag_mold.F90 | 65 + gpu/impl/psb_z_hdiag_to_gpu.F90 | 86 ++ gpu/impl/psb_z_hdiag_vect_mv.F90 | 126 ++ gpu/impl/psb_z_hlg_allocate_mnnz.F90 | 71 + gpu/impl/psb_z_hlg_csmm.F90 | 132 ++ gpu/impl/psb_z_hlg_csmv.F90 | 135 ++ gpu/impl/psb_z_hlg_from_gpu.F90 | 76 + gpu/impl/psb_z_hlg_inner_vect_sv.F90 | 81 + gpu/impl/psb_z_hlg_mold.F90 | 64 + gpu/impl/psb_z_hlg_reallocate_nz.F90 | 67 + gpu/impl/psb_z_hlg_scal.F90 | 75 + gpu/impl/psb_z_hlg_scals.F90 | 73 + gpu/impl/psb_z_hlg_to_gpu.F90 | 68 + gpu/impl/psb_z_hlg_vect_mv.F90 | 129 ++ gpu/impl/psb_z_hybg_allocate_mnnz.F90 | 69 + gpu/impl/psb_z_hybg_csmm.F90 | 135 ++ gpu/impl/psb_z_hybg_csmv.F90 | 138 ++ gpu/impl/psb_z_hybg_inner_vect_sv.F90 | 138 ++ gpu/impl/psb_z_hybg_mold.F90 | 66 + gpu/impl/psb_z_hybg_reallocate_nz.F90 | 71 + gpu/impl/psb_z_hybg_scal.F90 | 76 + gpu/impl/psb_z_hybg_scals.F90 | 76 + gpu/impl/psb_z_hybg_to_gpu.F90 | 154 ++ gpu/impl/psb_z_hybg_vect_mv.F90 | 127 ++ gpu/impl/psb_z_mv_csrg_from_coo.F90 | 65 + gpu/impl/psb_z_mv_csrg_from_fmt.F90 | 63 + gpu/impl/psb_z_mv_diag_from_coo.F90 | 69 + gpu/impl/psb_z_mv_elg_from_coo.F90 | 61 + gpu/impl/psb_z_mv_elg_from_fmt.F90 | 99 ++ gpu/impl/psb_z_mv_hdiag_from_coo.F90 | 74 + gpu/impl/psb_z_mv_hlg_from_coo.F90 | 61 + gpu/impl/psb_z_mv_hlg_from_fmt.F90 | 62 + gpu/impl/psb_z_mv_hybg_from_coo.F90 | 65 + gpu/impl/psb_z_mv_hybg_from_fmt.F90 | 62 + gpu/ivectordev.c | 182 +++ gpu/ivectordev.h | 64 + gpu/psb_base_vectordev_mod.F90 | 104 ++ gpu/psb_c_csrg_mat_mod.F90 | 393 +++++ gpu/psb_c_diag_mat_mod.F90 | 308 ++++ gpu/psb_c_dnsg_mat_mod.F90 | 294 ++++ gpu/psb_c_elg_mat_mod.F90 | 483 ++++++ gpu/psb_c_gpu_vect_mod.F90 | 1989 +++++++++++++++++++++++++ gpu/psb_c_hdiag_mat_mod.F90 | 287 ++++ gpu/psb_c_hlg_mat_mod.F90 | 398 +++++ gpu/psb_c_hybg_mat_mod.F90 | 306 ++++ gpu/psb_c_vectordev_mod.F90 | 390 +++++ gpu/psb_d_csrg_mat_mod.F90 | 393 +++++ gpu/psb_d_diag_mat_mod.F90 | 308 ++++ gpu/psb_d_dnsg_mat_mod.F90 | 294 ++++ gpu/psb_d_elg_mat_mod.F90 | 483 ++++++ gpu/psb_d_gpu_vect_mod.F90 | 1989 +++++++++++++++++++++++++ gpu/psb_d_hdiag_mat_mod.F90 | 287 ++++ gpu/psb_d_hlg_mat_mod.F90 | 398 +++++ gpu/psb_d_hybg_mat_mod.F90 | 306 ++++ gpu/psb_d_vectordev_mod.F90 | 390 +++++ gpu/psb_gpu_env_mod.F90 | 340 +++++ gpu/psb_gpu_mod.F90 | 89 ++ gpu/psb_i_csrg_mat_mod.F90 | 393 +++++ gpu/psb_i_diag_mat_mod.F90 | 308 ++++ gpu/psb_i_dnsg_mat_mod.F90 | 294 ++++ gpu/psb_i_elg_mat_mod.F90 | 483 ++++++ gpu/psb_i_gpu_vect_mod.F90 | 1671 +++++++++++++++++++++ gpu/psb_i_hdiag_mat_mod.F90 | 287 ++++ gpu/psb_i_hlg_mat_mod.F90 | 398 +++++ gpu/psb_i_hybg_mat_mod.F90 | 306 ++++ gpu/psb_i_vectordev_mod.F90 | 283 ++++ gpu/psb_s_csrg_mat_mod.F90 | 393 +++++ gpu/psb_s_diag_mat_mod.F90 | 308 ++++ gpu/psb_s_dnsg_mat_mod.F90 | 294 ++++ gpu/psb_s_elg_mat_mod.F90 | 483 ++++++ gpu/psb_s_gpu_vect_mod.F90 | 1989 +++++++++++++++++++++++++ gpu/psb_s_hdiag_mat_mod.F90 | 287 ++++ gpu/psb_s_hlg_mat_mod.F90 | 398 +++++ gpu/psb_s_hybg_mat_mod.F90 | 306 ++++ gpu/psb_s_vectordev_mod.F90 | 390 +++++ gpu/psb_vectordev_mod.f90 | 8 + gpu/psb_z_csrg_mat_mod.F90 | 393 +++++ gpu/psb_z_diag_mat_mod.F90 | 308 ++++ gpu/psb_z_dnsg_mat_mod.F90 | 294 ++++ gpu/psb_z_elg_mat_mod.F90 | 483 ++++++ gpu/psb_z_gpu_vect_mod.F90 | 1989 +++++++++++++++++++++++++ gpu/psb_z_hdiag_mat_mod.F90 | 287 ++++ gpu/psb_z_hlg_mat_mod.F90 | 398 +++++ gpu/psb_z_hybg_mat_mod.F90 | 306 ++++ gpu/psb_z_vectordev_mod.F90 | 390 +++++ gpu/s_cusparse_mod.F90 | 305 ++++ gpu/scusparse.c | 95 ++ gpu/svectordev.c | 304 ++++ gpu/svectordev.h | 78 + gpu/vectordev.c | 198 +++ gpu/vectordev.h | 90 ++ gpu/z_cusparse_mod.F90 | 305 ++++ gpu/zcusparse.c | 94 ++ gpu/zvectordev.c | 321 ++++ gpu/zvectordev.h | 91 ++ 412 files changed, 66802 insertions(+), 22 deletions(-) create mode 100644 gpu/CUDA/Makefile create mode 100644 gpu/CUDA/psi_cuda_CopyCooToElg.cuh create mode 100644 gpu/CUDA/psi_cuda_CopyCooToHlg.cuh create mode 100644 gpu/CUDA/psi_cuda_c_CopyCooToElg.cu create mode 100644 gpu/CUDA/psi_cuda_c_CopyCooToHlg.cu create mode 100644 gpu/CUDA/psi_cuda_common.cuh create mode 100644 gpu/CUDA/psi_cuda_d_CopyCooToElg.cu create mode 100644 gpu/CUDA/psi_cuda_d_CopyCooToHlg.cu create mode 100644 gpu/CUDA/psi_cuda_s_CopyCooToElg.cu create mode 100644 gpu/CUDA/psi_cuda_s_CopyCooToHlg.cu create mode 100644 gpu/CUDA/psi_cuda_z_CopyCooToElg.cu create mode 100644 gpu/CUDA/psi_cuda_z_CopyCooToHlg.cu create mode 100755 gpu/Makefile create mode 100644 gpu/base_cusparse_mod.F90 create mode 100644 gpu/c_cusparse_mod.F90 create mode 100644 gpu/ccusparse.c create mode 100644 gpu/cintrf.h create mode 100644 gpu/core_mod.f90 create mode 100644 gpu/cuda_util.c create mode 100644 gpu/cuda_util.h create mode 100644 gpu/cusparse_mod.F90 create mode 100644 gpu/cvectordev.c create mode 100644 gpu/cvectordev.h create mode 100644 gpu/d_cusparse_mod.F90 create mode 100644 gpu/dcusparse.c create mode 100644 gpu/diagdev.c create mode 100644 gpu/diagdev.h create mode 100644 gpu/diagdev_mod.F90 create mode 100644 gpu/dnsdev.c create mode 100644 gpu/dnsdev.h create mode 100644 gpu/dnsdev_mod.F90 create mode 100644 gpu/dvectordev.c create mode 100644 gpu/dvectordev.h create mode 100644 gpu/elldev.c create mode 100644 gpu/elldev.h create mode 100644 gpu/elldev_mod.F90 create mode 100644 gpu/fcusparse.c create mode 100644 gpu/fcusparse.h create mode 100644 gpu/fcusparse_fct.h create mode 100644 gpu/hdiagdev.c create mode 100644 gpu/hdiagdev.h create mode 100644 gpu/hdiagdev_mod.F90 create mode 100644 gpu/hlldev.c create mode 100644 gpu/hlldev.h create mode 100644 gpu/hlldev_mod.F90 create mode 100755 gpu/impl/Makefile create mode 100644 gpu/impl/psb_c_cp_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_c_cp_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_c_cp_diag_from_coo.F90 create mode 100644 gpu/impl/psb_c_cp_elg_from_coo.F90 create mode 100644 gpu/impl/psb_c_cp_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_c_cp_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_c_cp_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_c_cp_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_c_cp_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_c_cp_hybg_from_fmt.F90 create mode 100644 gpu/impl/psb_c_csrg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_c_csrg_csmm.F90 create mode 100644 gpu/impl/psb_c_csrg_csmv.F90 create mode 100644 gpu/impl/psb_c_csrg_from_gpu.F90 create mode 100644 gpu/impl/psb_c_csrg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_c_csrg_mold.F90 create mode 100644 gpu/impl/psb_c_csrg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_c_csrg_scal.F90 create mode 100644 gpu/impl/psb_c_csrg_scals.F90 create mode 100644 gpu/impl/psb_c_csrg_to_gpu.F90 create mode 100644 gpu/impl/psb_c_csrg_vect_mv.F90 create mode 100644 gpu/impl/psb_c_diag_csmv.F90 create mode 100644 gpu/impl/psb_c_diag_mold.F90 create mode 100644 gpu/impl/psb_c_diag_to_gpu.F90 create mode 100644 gpu/impl/psb_c_diag_vect_mv.F90 create mode 100644 gpu/impl/psb_c_dnsg_mat_impl.F90 create mode 100644 gpu/impl/psb_c_elg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_c_elg_asb.f90 create mode 100644 gpu/impl/psb_c_elg_csmm.F90 create mode 100644 gpu/impl/psb_c_elg_csmv.F90 create mode 100644 gpu/impl/psb_c_elg_csput.F90 create mode 100644 gpu/impl/psb_c_elg_from_gpu.F90 create mode 100644 gpu/impl/psb_c_elg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_c_elg_mold.F90 create mode 100644 gpu/impl/psb_c_elg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_c_elg_scal.F90 create mode 100644 gpu/impl/psb_c_elg_scals.F90 create mode 100644 gpu/impl/psb_c_elg_to_gpu.F90 create mode 100644 gpu/impl/psb_c_elg_trim.f90 create mode 100644 gpu/impl/psb_c_elg_vect_mv.F90 create mode 100644 gpu/impl/psb_c_hdiag_csmv.F90 create mode 100644 gpu/impl/psb_c_hdiag_mold.F90 create mode 100644 gpu/impl/psb_c_hdiag_to_gpu.F90 create mode 100644 gpu/impl/psb_c_hdiag_vect_mv.F90 create mode 100644 gpu/impl/psb_c_hlg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_c_hlg_csmm.F90 create mode 100644 gpu/impl/psb_c_hlg_csmv.F90 create mode 100644 gpu/impl/psb_c_hlg_from_gpu.F90 create mode 100644 gpu/impl/psb_c_hlg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_c_hlg_mold.F90 create mode 100644 gpu/impl/psb_c_hlg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_c_hlg_scal.F90 create mode 100644 gpu/impl/psb_c_hlg_scals.F90 create mode 100644 gpu/impl/psb_c_hlg_to_gpu.F90 create mode 100644 gpu/impl/psb_c_hlg_vect_mv.F90 create mode 100644 gpu/impl/psb_c_hybg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_c_hybg_csmm.F90 create mode 100644 gpu/impl/psb_c_hybg_csmv.F90 create mode 100644 gpu/impl/psb_c_hybg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_c_hybg_mold.F90 create mode 100644 gpu/impl/psb_c_hybg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_c_hybg_scal.F90 create mode 100644 gpu/impl/psb_c_hybg_scals.F90 create mode 100644 gpu/impl/psb_c_hybg_to_gpu.F90 create mode 100644 gpu/impl/psb_c_hybg_vect_mv.F90 create mode 100644 gpu/impl/psb_c_mv_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_c_mv_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_c_mv_diag_from_coo.F90 create mode 100644 gpu/impl/psb_c_mv_elg_from_coo.F90 create mode 100644 gpu/impl/psb_c_mv_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_c_mv_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_c_mv_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_c_mv_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_c_mv_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_c_mv_hybg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_cp_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_d_cp_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_cp_diag_from_coo.F90 create mode 100644 gpu/impl/psb_d_cp_elg_from_coo.F90 create mode 100644 gpu/impl/psb_d_cp_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_cp_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_d_cp_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_d_cp_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_cp_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_d_cp_hybg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_csrg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_d_csrg_csmm.F90 create mode 100644 gpu/impl/psb_d_csrg_csmv.F90 create mode 100644 gpu/impl/psb_d_csrg_from_gpu.F90 create mode 100644 gpu/impl/psb_d_csrg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_d_csrg_mold.F90 create mode 100644 gpu/impl/psb_d_csrg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_d_csrg_scal.F90 create mode 100644 gpu/impl/psb_d_csrg_scals.F90 create mode 100644 gpu/impl/psb_d_csrg_to_gpu.F90 create mode 100644 gpu/impl/psb_d_csrg_vect_mv.F90 create mode 100644 gpu/impl/psb_d_diag_csmv.F90 create mode 100644 gpu/impl/psb_d_diag_mold.F90 create mode 100644 gpu/impl/psb_d_diag_to_gpu.F90 create mode 100644 gpu/impl/psb_d_diag_vect_mv.F90 create mode 100644 gpu/impl/psb_d_dnsg_mat_impl.F90 create mode 100644 gpu/impl/psb_d_elg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_d_elg_asb.f90 create mode 100644 gpu/impl/psb_d_elg_csmm.F90 create mode 100644 gpu/impl/psb_d_elg_csmv.F90 create mode 100644 gpu/impl/psb_d_elg_csput.F90 create mode 100644 gpu/impl/psb_d_elg_from_gpu.F90 create mode 100644 gpu/impl/psb_d_elg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_d_elg_mold.F90 create mode 100644 gpu/impl/psb_d_elg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_d_elg_scal.F90 create mode 100644 gpu/impl/psb_d_elg_scals.F90 create mode 100644 gpu/impl/psb_d_elg_to_gpu.F90 create mode 100644 gpu/impl/psb_d_elg_trim.f90 create mode 100644 gpu/impl/psb_d_elg_vect_mv.F90 create mode 100644 gpu/impl/psb_d_hdiag_csmv.F90 create mode 100644 gpu/impl/psb_d_hdiag_mold.F90 create mode 100644 gpu/impl/psb_d_hdiag_to_gpu.F90 create mode 100644 gpu/impl/psb_d_hdiag_vect_mv.F90 create mode 100644 gpu/impl/psb_d_hlg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_d_hlg_csmm.F90 create mode 100644 gpu/impl/psb_d_hlg_csmv.F90 create mode 100644 gpu/impl/psb_d_hlg_from_gpu.F90 create mode 100644 gpu/impl/psb_d_hlg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_d_hlg_mold.F90 create mode 100644 gpu/impl/psb_d_hlg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_d_hlg_scal.F90 create mode 100644 gpu/impl/psb_d_hlg_scals.F90 create mode 100644 gpu/impl/psb_d_hlg_to_gpu.F90 create mode 100644 gpu/impl/psb_d_hlg_vect_mv.F90 create mode 100644 gpu/impl/psb_d_hybg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_d_hybg_csmm.F90 create mode 100644 gpu/impl/psb_d_hybg_csmv.F90 create mode 100644 gpu/impl/psb_d_hybg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_d_hybg_mold.F90 create mode 100644 gpu/impl/psb_d_hybg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_d_hybg_scal.F90 create mode 100644 gpu/impl/psb_d_hybg_scals.F90 create mode 100644 gpu/impl/psb_d_hybg_to_gpu.F90 create mode 100644 gpu/impl/psb_d_hybg_vect_mv.F90 create mode 100644 gpu/impl/psb_d_mv_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_d_mv_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_mv_diag_from_coo.F90 create mode 100644 gpu/impl/psb_d_mv_elg_from_coo.F90 create mode 100644 gpu/impl/psb_d_mv_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_mv_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_d_mv_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_d_mv_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_mv_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_d_mv_hybg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_cp_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_s_cp_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_cp_diag_from_coo.F90 create mode 100644 gpu/impl/psb_s_cp_elg_from_coo.F90 create mode 100644 gpu/impl/psb_s_cp_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_cp_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_s_cp_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_s_cp_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_cp_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_s_cp_hybg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_csrg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_s_csrg_csmm.F90 create mode 100644 gpu/impl/psb_s_csrg_csmv.F90 create mode 100644 gpu/impl/psb_s_csrg_from_gpu.F90 create mode 100644 gpu/impl/psb_s_csrg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_s_csrg_mold.F90 create mode 100644 gpu/impl/psb_s_csrg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_s_csrg_scal.F90 create mode 100644 gpu/impl/psb_s_csrg_scals.F90 create mode 100644 gpu/impl/psb_s_csrg_to_gpu.F90 create mode 100644 gpu/impl/psb_s_csrg_vect_mv.F90 create mode 100644 gpu/impl/psb_s_diag_csmv.F90 create mode 100644 gpu/impl/psb_s_diag_mold.F90 create mode 100644 gpu/impl/psb_s_diag_to_gpu.F90 create mode 100644 gpu/impl/psb_s_diag_vect_mv.F90 create mode 100644 gpu/impl/psb_s_dnsg_mat_impl.F90 create mode 100644 gpu/impl/psb_s_elg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_s_elg_asb.f90 create mode 100644 gpu/impl/psb_s_elg_csmm.F90 create mode 100644 gpu/impl/psb_s_elg_csmv.F90 create mode 100644 gpu/impl/psb_s_elg_csput.F90 create mode 100644 gpu/impl/psb_s_elg_from_gpu.F90 create mode 100644 gpu/impl/psb_s_elg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_s_elg_mold.F90 create mode 100644 gpu/impl/psb_s_elg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_s_elg_scal.F90 create mode 100644 gpu/impl/psb_s_elg_scals.F90 create mode 100644 gpu/impl/psb_s_elg_to_gpu.F90 create mode 100644 gpu/impl/psb_s_elg_trim.f90 create mode 100644 gpu/impl/psb_s_elg_vect_mv.F90 create mode 100644 gpu/impl/psb_s_hdiag_csmv.F90 create mode 100644 gpu/impl/psb_s_hdiag_mold.F90 create mode 100644 gpu/impl/psb_s_hdiag_to_gpu.F90 create mode 100644 gpu/impl/psb_s_hdiag_vect_mv.F90 create mode 100644 gpu/impl/psb_s_hlg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_s_hlg_csmm.F90 create mode 100644 gpu/impl/psb_s_hlg_csmv.F90 create mode 100644 gpu/impl/psb_s_hlg_from_gpu.F90 create mode 100644 gpu/impl/psb_s_hlg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_s_hlg_mold.F90 create mode 100644 gpu/impl/psb_s_hlg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_s_hlg_scal.F90 create mode 100644 gpu/impl/psb_s_hlg_scals.F90 create mode 100644 gpu/impl/psb_s_hlg_to_gpu.F90 create mode 100644 gpu/impl/psb_s_hlg_vect_mv.F90 create mode 100644 gpu/impl/psb_s_hybg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_s_hybg_csmm.F90 create mode 100644 gpu/impl/psb_s_hybg_csmv.F90 create mode 100644 gpu/impl/psb_s_hybg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_s_hybg_mold.F90 create mode 100644 gpu/impl/psb_s_hybg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_s_hybg_scal.F90 create mode 100644 gpu/impl/psb_s_hybg_scals.F90 create mode 100644 gpu/impl/psb_s_hybg_to_gpu.F90 create mode 100644 gpu/impl/psb_s_hybg_vect_mv.F90 create mode 100644 gpu/impl/psb_s_mv_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_s_mv_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_mv_diag_from_coo.F90 create mode 100644 gpu/impl/psb_s_mv_elg_from_coo.F90 create mode 100644 gpu/impl/psb_s_mv_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_mv_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_s_mv_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_s_mv_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_mv_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_s_mv_hybg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_cp_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_z_cp_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_cp_diag_from_coo.F90 create mode 100644 gpu/impl/psb_z_cp_elg_from_coo.F90 create mode 100644 gpu/impl/psb_z_cp_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_cp_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_z_cp_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_z_cp_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_cp_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_z_cp_hybg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_csrg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_z_csrg_csmm.F90 create mode 100644 gpu/impl/psb_z_csrg_csmv.F90 create mode 100644 gpu/impl/psb_z_csrg_from_gpu.F90 create mode 100644 gpu/impl/psb_z_csrg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_z_csrg_mold.F90 create mode 100644 gpu/impl/psb_z_csrg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_z_csrg_scal.F90 create mode 100644 gpu/impl/psb_z_csrg_scals.F90 create mode 100644 gpu/impl/psb_z_csrg_to_gpu.F90 create mode 100644 gpu/impl/psb_z_csrg_vect_mv.F90 create mode 100644 gpu/impl/psb_z_diag_csmv.F90 create mode 100644 gpu/impl/psb_z_diag_mold.F90 create mode 100644 gpu/impl/psb_z_diag_to_gpu.F90 create mode 100644 gpu/impl/psb_z_diag_vect_mv.F90 create mode 100644 gpu/impl/psb_z_dnsg_mat_impl.F90 create mode 100644 gpu/impl/psb_z_elg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_z_elg_asb.f90 create mode 100644 gpu/impl/psb_z_elg_csmm.F90 create mode 100644 gpu/impl/psb_z_elg_csmv.F90 create mode 100644 gpu/impl/psb_z_elg_csput.F90 create mode 100644 gpu/impl/psb_z_elg_from_gpu.F90 create mode 100644 gpu/impl/psb_z_elg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_z_elg_mold.F90 create mode 100644 gpu/impl/psb_z_elg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_z_elg_scal.F90 create mode 100644 gpu/impl/psb_z_elg_scals.F90 create mode 100644 gpu/impl/psb_z_elg_to_gpu.F90 create mode 100644 gpu/impl/psb_z_elg_trim.f90 create mode 100644 gpu/impl/psb_z_elg_vect_mv.F90 create mode 100644 gpu/impl/psb_z_hdiag_csmv.F90 create mode 100644 gpu/impl/psb_z_hdiag_mold.F90 create mode 100644 gpu/impl/psb_z_hdiag_to_gpu.F90 create mode 100644 gpu/impl/psb_z_hdiag_vect_mv.F90 create mode 100644 gpu/impl/psb_z_hlg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_z_hlg_csmm.F90 create mode 100644 gpu/impl/psb_z_hlg_csmv.F90 create mode 100644 gpu/impl/psb_z_hlg_from_gpu.F90 create mode 100644 gpu/impl/psb_z_hlg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_z_hlg_mold.F90 create mode 100644 gpu/impl/psb_z_hlg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_z_hlg_scal.F90 create mode 100644 gpu/impl/psb_z_hlg_scals.F90 create mode 100644 gpu/impl/psb_z_hlg_to_gpu.F90 create mode 100644 gpu/impl/psb_z_hlg_vect_mv.F90 create mode 100644 gpu/impl/psb_z_hybg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_z_hybg_csmm.F90 create mode 100644 gpu/impl/psb_z_hybg_csmv.F90 create mode 100644 gpu/impl/psb_z_hybg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_z_hybg_mold.F90 create mode 100644 gpu/impl/psb_z_hybg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_z_hybg_scal.F90 create mode 100644 gpu/impl/psb_z_hybg_scals.F90 create mode 100644 gpu/impl/psb_z_hybg_to_gpu.F90 create mode 100644 gpu/impl/psb_z_hybg_vect_mv.F90 create mode 100644 gpu/impl/psb_z_mv_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_z_mv_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_mv_diag_from_coo.F90 create mode 100644 gpu/impl/psb_z_mv_elg_from_coo.F90 create mode 100644 gpu/impl/psb_z_mv_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_mv_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_z_mv_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_z_mv_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_mv_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_z_mv_hybg_from_fmt.F90 create mode 100644 gpu/ivectordev.c create mode 100644 gpu/ivectordev.h create mode 100644 gpu/psb_base_vectordev_mod.F90 create mode 100644 gpu/psb_c_csrg_mat_mod.F90 create mode 100644 gpu/psb_c_diag_mat_mod.F90 create mode 100644 gpu/psb_c_dnsg_mat_mod.F90 create mode 100644 gpu/psb_c_elg_mat_mod.F90 create mode 100644 gpu/psb_c_gpu_vect_mod.F90 create mode 100644 gpu/psb_c_hdiag_mat_mod.F90 create mode 100644 gpu/psb_c_hlg_mat_mod.F90 create mode 100644 gpu/psb_c_hybg_mat_mod.F90 create mode 100644 gpu/psb_c_vectordev_mod.F90 create mode 100644 gpu/psb_d_csrg_mat_mod.F90 create mode 100644 gpu/psb_d_diag_mat_mod.F90 create mode 100644 gpu/psb_d_dnsg_mat_mod.F90 create mode 100644 gpu/psb_d_elg_mat_mod.F90 create mode 100644 gpu/psb_d_gpu_vect_mod.F90 create mode 100644 gpu/psb_d_hdiag_mat_mod.F90 create mode 100644 gpu/psb_d_hlg_mat_mod.F90 create mode 100644 gpu/psb_d_hybg_mat_mod.F90 create mode 100644 gpu/psb_d_vectordev_mod.F90 create mode 100644 gpu/psb_gpu_env_mod.F90 create mode 100644 gpu/psb_gpu_mod.F90 create mode 100644 gpu/psb_i_csrg_mat_mod.F90 create mode 100644 gpu/psb_i_diag_mat_mod.F90 create mode 100644 gpu/psb_i_dnsg_mat_mod.F90 create mode 100644 gpu/psb_i_elg_mat_mod.F90 create mode 100644 gpu/psb_i_gpu_vect_mod.F90 create mode 100644 gpu/psb_i_hdiag_mat_mod.F90 create mode 100644 gpu/psb_i_hlg_mat_mod.F90 create mode 100644 gpu/psb_i_hybg_mat_mod.F90 create mode 100644 gpu/psb_i_vectordev_mod.F90 create mode 100644 gpu/psb_s_csrg_mat_mod.F90 create mode 100644 gpu/psb_s_diag_mat_mod.F90 create mode 100644 gpu/psb_s_dnsg_mat_mod.F90 create mode 100644 gpu/psb_s_elg_mat_mod.F90 create mode 100644 gpu/psb_s_gpu_vect_mod.F90 create mode 100644 gpu/psb_s_hdiag_mat_mod.F90 create mode 100644 gpu/psb_s_hlg_mat_mod.F90 create mode 100644 gpu/psb_s_hybg_mat_mod.F90 create mode 100644 gpu/psb_s_vectordev_mod.F90 create mode 100644 gpu/psb_vectordev_mod.f90 create mode 100644 gpu/psb_z_csrg_mat_mod.F90 create mode 100644 gpu/psb_z_diag_mat_mod.F90 create mode 100644 gpu/psb_z_dnsg_mat_mod.F90 create mode 100644 gpu/psb_z_elg_mat_mod.F90 create mode 100644 gpu/psb_z_gpu_vect_mod.F90 create mode 100644 gpu/psb_z_hdiag_mat_mod.F90 create mode 100644 gpu/psb_z_hlg_mat_mod.F90 create mode 100644 gpu/psb_z_hybg_mat_mod.F90 create mode 100644 gpu/psb_z_vectordev_mod.F90 create mode 100644 gpu/s_cusparse_mod.F90 create mode 100644 gpu/scusparse.c create mode 100644 gpu/svectordev.c create mode 100644 gpu/svectordev.h create mode 100644 gpu/vectordev.c create mode 100644 gpu/vectordev.h create mode 100644 gpu/z_cusparse_mod.F90 create mode 100644 gpu/zcusparse.c create mode 100644 gpu/zvectordev.c create mode 100644 gpu/zvectordev.h diff --git a/Make.inc.in b/Make.inc.in index 1fa3179c..e28e9bee 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -67,6 +67,28 @@ UTILMODNAME=@UTILMODNAME@ CBINDLIBNAME=libpsb_cbind.a +GPUD=@GPUD@ +GPULD=@GPULD@ + +SPGPUDIR=@SPGPU_DIR@ +SPGPU_INCDIR=@SPGPU_INCDIR@ +SPGPU_LIBS=@SPGPU_LIBS@ +SPGPU_DEFINES=@SPGPU_DEFINES@ +SPGPU_INCLUDES=@SPGPU_INCLUDES@ + +CUDA_DIR=@CUDA_DIR@ +CUDA_DEFINES=@CUDA_DEFINES@ +CUDA_INCLUDES=@CUDA_INCLUDES@ +CUDA_LIBS=@CUDA_LIBS@ +CUDA_VERSION=@CUDA_VERSION@ +CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@ +NVCC=@CUDA_NVCC@ +CUDEFINES=@CUDEFINES@ + +.SUFFIXES: .cu +.cu.o: + $(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $< + @PSBLASRULES@ diff --git a/Makefile b/Makefile index 4a79afce..49879270 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ include Make.inc -all: dirs based precd kryld utild cbindd extd libd +all: dirs based precd kryld utild cbindd extd $(GPUD) libd @echo "=====================================" @echo "PSBLAS libraries Compilation Successful." @@ -12,17 +12,20 @@ dirs: precd: based utild: based kryld: precd -extd: based - +extd: based +gpud: extd cbindd: based precd kryld utild -libd: based precd kryld utild cbindd extd +libd: based precd kryld utild cbindd extd $(GPULD) $(MAKE) -C base lib $(MAKE) -C prec lib $(MAKE) -C krylov lib $(MAKE) -C util lib $(MAKE) -C cbind lib $(MAKE) -C ext lib +gpuld: gpud + $(MAKE) -C gpu lib + based: $(MAKE) -C base objs @@ -34,8 +37,10 @@ utild: $(MAKE) -C util objs cbindd: $(MAKE) -C cbind objs -extd: +extd: based $(MAKE) -C ext objs +gpud: based extd + $(MAKE) -C gpu objs install: all @@ -61,6 +66,7 @@ clean: $(MAKE) -C util clean $(MAKE) -C cbind clean $(MAKE) -C ext clean + $(MAKE) -C gpu clean check: all make check -C test/serial @@ -77,6 +83,7 @@ veryclean: cleanlib cd util && $(MAKE) veryclean cd cbind && $(MAKE) veryclean cd ext && $(MAKE) veryclean + cd gpu && $(MAKE) veryclean cd test/fileread && $(MAKE) clean cd test/pargen && $(MAKE) clean cd test/util && $(MAKE) clean diff --git a/config/pac.m4 b/config/pac.m4 index 69d2f863..7a9ee07e 100644 --- a/config/pac.m4 +++ b/config/pac.m4 @@ -2018,3 +2018,252 @@ CPPFLAGS="$SAVE_CPPFLAGS"; ])dnl +dnl @synopsis PAC_CHECK_SPGPU +dnl +dnl Will try to find the spgpu library and headers. +dnl +dnl Will use $CC +dnl +dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND. +dnl Note : This file will be likely to induce the compiler to create a module file +dnl (for a module called conftest). +dnl Depending on the compiler flags, this could cause a conftest.mod file to appear +dnl in the present directory, or in another, or with another name. So be warned! +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN(PAC_CHECK_SPGPU, + [SAVE_LIBS="$LIBS" + SAVE_CPPFLAGS="$CPPFLAGS" + if test "x$pac_cv_have_cuda" == "x"; then + PAC_CHECK_CUDA() + fi +dnl AC_MSG_NOTICE([From CUDA: $pac_cv_have_cuda ]) + if test "x$pac_cv_have_cuda" == "xyes"; then + AC_ARG_WITH(spgpu, AC_HELP_STRING([--with-spgpu=DIR], [Specify the directory for SPGPU library and includes.]), + [pac_cv_spgpudir=$withval], + [pac_cv_spgpudir='']) + + AC_LANG([C]) + if test "x$pac_cv_spgpudir" != "x"; then + LIBS="-L$pac_cv_spgpudir/lib $LIBS" + GPU_INCLUDES="-I$pac_cv_spgpudir/include" + CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS" + GPU_LIBDIR="-L$pac_cv_spgpudir/lib" + fi + AC_MSG_CHECKING([spgpu dir $pac_cv_spgpudir]) + AC_CHECK_HEADER([core.h], + [pac_gpu_header_ok=yes], + [pac_gpu_header_ok=no; GPU_INCLUDES=""]) + + if test "x$pac_gpu_header_ok" == "xyes" ; then + GPU_LIBS="-lspgpu $GPU_LIBDIR" + LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS"; + AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS]) + AC_TRY_LINK_FUNC(spgpuCreate, + [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ], + [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""]) + AC_MSG_RESULT($pac_gpu_lib_ok) + if test "x$pac_cv_have_spgpu" == "xyes" ; then + AC_MSG_NOTICE([Have found SPGPU]) + SPGPULIBNAME="libpsbgpu.a"; + SPGPU_DIR="$pac_cv_spgpudir"; + SPGPU_DEFINES="-DHAVE_SPGPU"; + SPGPU_INCDIR="$SPGPU_DIR/include"; + SPGPU_INCLUDES="-I$SPGPU_INCDIR"; + SPGPU_LIBS="-lspgpu -L$SPGPU_DIR/lib"; + LGPU=-lpsb_gpu + CUDA_DIR="$pac_cv_cuda_dir"; + CUDA_DEFINES="-DHAVE_CUDA"; + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + FDEFINES="$psblas_cv_define_prepend-DHAVE_GPU $psblas_cv_define_prepend-DHAVE_SPGPU $psblas_cv_define_prepend-DHAVE_CUDA $FDEFINES"; + CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES" ; + fi + fi +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +])dnl + + + + +dnl @synopsis PAC_CHECK_CUDA +dnl +dnl Will try to find the cuda library and headers. +dnl +dnl Will use $CC +dnl +dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND. +dnl Note : This file will be likely to induce the compiler to create a module file +dnl (for a module called conftest). +dnl Depending on the compiler flags, this could cause a conftest.mod file to appear +dnl in the present directory, or in another, or with another name. So be warned! +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN(PAC_CHECK_CUDA, +[AC_ARG_WITH(cuda, AC_HELP_STRING([--with-cuda=DIR], [Specify the directory for CUDA library and includes.]), + [pac_cv_cuda_dir=$withval], + [pac_cv_cuda_dir='']) + +AC_LANG([C]) +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_cuda_dir" != "x"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + if test -f "$pac_cv_cuda_dir/bin/nvcc"; then + CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" + else + CUDA_NVCC="nvcc" + fi +fi +AC_MSG_CHECKING([cuda dir $pac_cv_cuda_dir]) +AC_CHECK_HEADER([cuda_runtime.h], + [pac_cuda_header_ok=yes], + [pac_cuda_header_ok=no; CUDA_INCLUDES=""]) + +if test "x$pac_cuda_header_ok" == "xyes" ; then + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + AC_MSG_CHECKING([for cudaMemcpy in $CUDA_LIBS]) + AC_TRY_LINK_FUNC(cudaMemcpy, + [pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; ], + [pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS=""]) + AC_MSG_RESULT($pac_cuda_lib_ok) + +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +])dnl + +dnl @synopsis PAC_ARG_WITH_CUDACC +dnl +dnl Test for --with-cudacc="set_of_cc". +dnl +dnl Defines the CC to compile for +dnl +dnl +dnl Example use: +dnl +dnl PAC_ARG_WITH_CUDACC +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN([PAC_ARG_WITH_CUDACC], +[ +AC_ARG_WITH(cudacc, +AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, for example, + --with-cudacc=30,35,37,50,60]), +[pac_cv_cudacc=$withval], +[pac_cv_cudacc='']) +]) + +AC_DEFUN(PAC_ARG_WITH_LIBRSB, + [SAVE_LIBS="$LIBS" + SAVE_CPPFLAGS="$CPPFLAGS" + + AC_ARG_WITH(librsb, + AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example, + --with-librsb=/opt/packages/librsb]), + [pac_cv_librsb_dir=$withval], + [pac_cv_librsb_dir='']) + + if test "x$pac_cv_librsb_dir" != "x"; then + LIBS="-L$pac_cv_librsb_dir $LIBS" + RSB_INCLUDES="-I$pac_cv_librsb_dir" + # CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS" + RSB_LIBDIR="-L$pac_cv_librsb_dir" + fi + #AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir]) + AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h], + [pac_rsb_header_ok=yes], + [pac_rsb_header_ok=no; RSB_INCLUDES=""]) + + if test "x$pac_rsb_header_ok" == "xyes" ; then + RSB_LIBS="-lrsb $RSB_LIBDIR" + # LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS"; + # AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS]) + # AC_TRY_LINK_FUNC(spgpuCreate, + # [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ], + # [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""]) + # AC_MSG_RESULT($pac_gpu_lib_ok) + # if test "x$pac_cv_have_spgpu" == "xyes" ; then + # AC_MSG_NOTICE([Have found SPGPU]) + RSBLIBNAME="librsb.a"; + LIBRSB_DIR="$pac_cv_librsb_dir"; + # SPGPU_DEFINES="-DHAVE_SPGPU"; + LIBRSB_INCDIR="$LIBRSB_DIR"; + LIBRSB_INCLUDES="-I$LIBRSB_INCDIR"; + LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR"; + # CUDA_DIR="$pac_cv_cuda_dir"; + LIBRSB_DEFINES="-DHAVE_RSB"; + LRSB=-lpsb_rsb + # CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + # CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES"; + CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES"; + fi +# fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +]) +dnl + +dnl @synopsis PAC_CHECK_CUDA_VERSION +dnl +dnl Will try to find the cuda version +dnl +dnl Will use $CC +dnl +dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND. +dnl Note : This file will be likely to induce the compiler to create a module file +dnl (for a module called conftest). +dnl Depending on the compiler flags, this could cause a conftest.mod file to appear +dnl in the present directory, or in another, or with another name. So be warned! +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN(PAC_CHECK_CUDA_VERSION, +[AC_LANG_PUSH([C]) +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_have_cuda" == "x"; then + PAC_CHECK_CUDA() +fi +if test "x$pac_cv_have_cuda" == "xyes"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + AC_MSG_CHECKING([for CUDA version]) + AC_LINK_IFELSE([AC_LANG_SOURCE([ +#include +#include + +int main(int argc, char *argv[]) +{ + printf("%d",CUDA_VERSION); + return(0); +} ])], + [pac_cv_cuda_version=`./conftest${ac_exeext} | sed 's/^ *//'`;], + [pac_cv_cuda_version="unknown";]) + + AC_MSG_RESULT($pac_cv_cuda_version) + fi +AC_LANG_POP([C]) +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +])dnl + + diff --git a/configure b/configure index 752fe192..5c3444b3 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for PSBLAS 3.7.0. +# Generated by GNU Autoconf 2.71 for PSBLAS 3.8.1. # # Report bugs to . # @@ -611,8 +611,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='PSBLAS' PACKAGE_TARNAME='psblas' -PACKAGE_VERSION='3.7.0' -PACKAGE_STRING='PSBLAS 3.7.0' +PACKAGE_VERSION='3.8.1' +PACKAGE_STRING='PSBLAS 3.8.1' PACKAGE_BUGREPORT='https://github.com/sfilippone/psblas3/issues' PACKAGE_URL='' @@ -653,6 +653,23 @@ ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS LIBOBJS +GPULD +GPUD +CUDEFINES +CUDA_NVCC +CUDA_SHORT_VERSION +CUDA_VERSION +CUDA_LIBS +CUDA_INCLUDES +CUDA_DEFINES +CUDA_DIR +EXTRALDLIBS +SPGPU_INCDIR +SPGPU_INCLUDES +SPGPU_DEFINES +SPGPU_DIR +SPGPU_LIBS +SPGPU_FLAGS METISINCFILE UTILLIBNAME METHDLIBNAME @@ -825,6 +842,9 @@ with_amd with_amddir with_amdincdir with_amdlibdir +with_cuda +with_spgpu +with_cudacc ' ac_precious_vars='build_alias host_alias @@ -1390,7 +1410,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures PSBLAS 3.7.0 to adapt to many kinds of systems. +\`configure' configures PSBLAS 3.8.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1457,7 +1477,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of PSBLAS 3.7.0:";; + short | recursive ) echo "Configuration of PSBLAS 3.8.1:";; esac cat <<\_ACEOF @@ -1523,6 +1543,11 @@ Optional Packages: --with-amddir=DIR Specify the directory for AMD library and includes. --with-amdincdir=DIR Specify the directory for AMD includes. --with-amdlibdir=DIR Specify the directory for AMD library. + --with-cuda=DIR Specify the directory for CUDA library and includes. + --with-spgpu=DIR Specify the directory for SPGPU library and + includes. + --with-cudacc A comma-separated list of CCs to compile to, for + example, --with-cudacc=30,35,37,50,60 Some influential environment variables: FC Fortran compiler command @@ -1607,7 +1632,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -PSBLAS configure 3.7.0 +PSBLAS configure 3.8.1 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -2291,7 +2316,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by PSBLAS $as_me 3.7.0, which was +It was created by PSBLAS $as_me 3.8.1, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3265,7 +3290,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu # VERSION is the file containing the PSBLAS version code # FIXME -psblas_cv_version="3.7.0" +psblas_cv_version="3.8.1" # A sample source file @@ -3280,7 +3305,8 @@ psblas_cv_version="3.7.0" documentation, you can make your own by hand for your needs. Be sure to specify the library paths of your interest. Examples: - ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + --with-spgpu=/path/to/spgpu FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -3294,7 +3320,8 @@ printf "%s\n" "$as_me: documentation, you can make your own by hand for your needs. Be sure to specify the library paths of your interest. Examples: - ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + --with-spgpu=/path/to/spgpu FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -6393,7 +6420,7 @@ fi # Define the identity of the package. PACKAGE='psblas' - VERSION='3.7.0' + VERSION='3.8.1' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -10610,6 +10637,434 @@ fi + +# Check whether --with-cuda was given. +if test ${with_cuda+y} +then : + withval=$with_cuda; pac_cv_cuda_dir=$withval +else $as_nop + pac_cv_cuda_dir='' +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_cuda_dir" != "x"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + if test -f "$pac_cv_cuda_dir/bin/nvcc"; then + CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" + else + CUDA_NVCC="nvcc" + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cuda dir $pac_cv_cuda_dir" >&5 +printf %s "checking cuda dir $pac_cv_cuda_dir... " >&6; } +ac_fn_c_check_header_compile "$LINENO" "cuda_runtime.h" "ac_cv_header_cuda_runtime_h" "$ac_includes_default" +if test "x$ac_cv_header_cuda_runtime_h" = xyes +then : + pac_cuda_header_ok=yes +else $as_nop + pac_cuda_header_ok=no; CUDA_INCLUDES="" +fi + + +if test "x$pac_cuda_header_ok" == "xyes" ; then + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cudaMemcpy in $CUDA_LIBS" >&5 +printf %s "checking for cudaMemcpy in $CUDA_LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cudaMemcpy (); +int +main (void) +{ +return cudaMemcpy (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; +else $as_nop + pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS="" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cuda_lib_ok" >&5 +printf "%s\n" "$pac_cuda_lib_ok" >&6; } + +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + + +if test "x$pac_cv_have_cuda" == "xyes"; then + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_have_cuda" == "x"; then + +# Check whether --with-cuda was given. +if test ${with_cuda+y} +then : + withval=$with_cuda; pac_cv_cuda_dir=$withval +else $as_nop + pac_cv_cuda_dir='' +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_cuda_dir" != "x"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + if test -f "$pac_cv_cuda_dir/bin/nvcc"; then + CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" + else + CUDA_NVCC="nvcc" + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cuda dir $pac_cv_cuda_dir" >&5 +printf %s "checking cuda dir $pac_cv_cuda_dir... " >&6; } +ac_fn_c_check_header_compile "$LINENO" "cuda_runtime.h" "ac_cv_header_cuda_runtime_h" "$ac_includes_default" +if test "x$ac_cv_header_cuda_runtime_h" = xyes +then : + pac_cuda_header_ok=yes +else $as_nop + pac_cuda_header_ok=no; CUDA_INCLUDES="" +fi + + +if test "x$pac_cuda_header_ok" == "xyes" ; then + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cudaMemcpy in $CUDA_LIBS" >&5 +printf %s "checking for cudaMemcpy in $CUDA_LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cudaMemcpy (); +int +main (void) +{ +return cudaMemcpy (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; +else $as_nop + pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS="" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cuda_lib_ok" >&5 +printf "%s\n" "$pac_cuda_lib_ok" >&6; } + +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + +fi +if test "x$pac_cv_have_cuda" == "xyes"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for CUDA version" >&5 +printf %s "checking for CUDA version... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include + +int main(int argc, char *argv) +{ + printf("%d",CUDA_VERSION); + return(0); +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_cuda_version=`./conftest${ac_exeext} | sed 's/^ *//'`; +else $as_nop + pac_cv_cuda_version="unknown"; +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cv_cuda_version" >&5 +printf "%s\n" "$pac_cv_cuda_version" >&6; } + fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + +CUDA_VERSION="$pac_cv_cuda_version"; +CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); +SAVE_LIBS="$LIBS" + SAVE_CPPFLAGS="$CPPFLAGS" + if test "x$pac_cv_have_cuda" == "x"; then + +# Check whether --with-cuda was given. +if test ${with_cuda+y} +then : + withval=$with_cuda; pac_cv_cuda_dir=$withval +else $as_nop + pac_cv_cuda_dir='' +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_cuda_dir" != "x"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + if test -f "$pac_cv_cuda_dir/bin/nvcc"; then + CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" + else + CUDA_NVCC="nvcc" + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cuda dir $pac_cv_cuda_dir" >&5 +printf %s "checking cuda dir $pac_cv_cuda_dir... " >&6; } +ac_fn_c_check_header_compile "$LINENO" "cuda_runtime.h" "ac_cv_header_cuda_runtime_h" "$ac_includes_default" +if test "x$ac_cv_header_cuda_runtime_h" = xyes +then : + pac_cuda_header_ok=yes +else $as_nop + pac_cuda_header_ok=no; CUDA_INCLUDES="" +fi + + +if test "x$pac_cuda_header_ok" == "xyes" ; then + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cudaMemcpy in $CUDA_LIBS" >&5 +printf %s "checking for cudaMemcpy in $CUDA_LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cudaMemcpy (); +int +main (void) +{ +return cudaMemcpy (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; +else $as_nop + pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS="" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cuda_lib_ok" >&5 +printf "%s\n" "$pac_cuda_lib_ok" >&6; } + +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + + fi + if test "x$pac_cv_have_cuda" == "xyes"; then + +# Check whether --with-spgpu was given. +if test ${with_spgpu+y} +then : + withval=$with_spgpu; pac_cv_spgpudir=$withval +else $as_nop + pac_cv_spgpudir='' +fi + + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + if test "x$pac_cv_spgpudir" != "x"; then + LIBS="-L$pac_cv_spgpudir/lib $LIBS" + GPU_INCLUDES="-I$pac_cv_spgpudir/include" + CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS" + GPU_LIBDIR="-L$pac_cv_spgpudir/lib" + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking spgpu dir $pac_cv_spgpudir" >&5 +printf %s "checking spgpu dir $pac_cv_spgpudir... " >&6; } + ac_fn_c_check_header_compile "$LINENO" "core.h" "ac_cv_header_core_h" "$ac_includes_default" +if test "x$ac_cv_header_core_h" = xyes +then : + pac_gpu_header_ok=yes +else $as_nop + pac_gpu_header_ok=no; GPU_INCLUDES="" +fi + + + if test "x$pac_gpu_header_ok" == "xyes" ; then + GPU_LIBS="-lspgpu $GPU_LIBDIR" + LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for spgpuCreate in $GPU_LIBS" >&5 +printf %s "checking for spgpuCreate in $GPU_LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char spgpuCreate (); +int +main (void) +{ +return spgpuCreate (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; +else $as_nop + pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS="" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_gpu_lib_ok" >&5 +printf "%s\n" "$pac_gpu_lib_ok" >&6; } + if test "x$pac_cv_have_spgpu" == "xyes" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Have found SPGPU" >&5 +printf "%s\n" "$as_me: Have found SPGPU" >&6;} + SPGPULIBNAME="libpsbgpu.a"; + SPGPU_DIR="$pac_cv_spgpudir"; + SPGPU_DEFINES="-DHAVE_SPGPU"; + SPGPU_INCDIR="$SPGPU_DIR/include"; + SPGPU_INCLUDES="-I$SPGPU_INCDIR"; + SPGPU_LIBS="-lspgpu -L$SPGPU_DIR/lib"; + LGPU=-lpsb_gpu + CUDA_DIR="$pac_cv_cuda_dir"; + CUDA_DEFINES="-DHAVE_CUDA"; + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + FDEFINES="$psblas_cv_define_prepend-DHAVE_GPU $psblas_cv_define_prepend-DHAVE_SPGPU $psblas_cv_define_prepend-DHAVE_CUDA $FDEFINES"; + CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES" ; + fi + fi +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + +if test "x$pac_cv_have_spgpu" == "xyes" ; then + GPUD=gpud; + GPULD=gpuld; + EXTRALDLIBS="-lstdc++"; +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: At this point GPUTARGET is $GPUD $GPULD" >&5 +printf "%s\n" "$as_me: At this point GPUTARGET is $GPUD $GPULD" >&6;} + + + +# Check whether --with-cudacc was given. +if test ${with_cudacc+y} +then : + withval=$with_cudacc; pac_cv_cudacc=$withval +else $as_nop + pac_cv_cudacc='' +fi + + +if test "x$pac_cv_cudacc" == "x"; then + pac_cv_cudacc="30,35,37,50,60"; +fi +CUDEFINES=""; +for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` +do + CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; +done +if test "x$pac_cv_cuda_version" != "xunknown"; then + CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" +fi + +fi + +if test "x$pac_cv_ipk_size" != "x4"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&5 +printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&6;} + GPUD=""; + GPULD=""; + CUDEFINES=""; + CUDA_INCLUDES=""; + CUDA_LIBS=""; +fi + + + + ############################################################################### # Library target directory and archive files. ############################################################################### @@ -10687,6 +11142,22 @@ FDEFINES=$(PSBFDEFINES) + + + + + + + + + + + + + + + + @@ -11262,7 +11733,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by PSBLAS $as_me 3.7.0, which was +This file was extended by PSBLAS $as_me 3.8.1, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -11321,7 +11792,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -PSBLAS config.status 3.7.0 +PSBLAS config.status 3.8.1 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index d8a02a50..4b7f82e1 100755 --- a/configure.ac +++ b/configure.ac @@ -36,11 +36,11 @@ dnl NOTE : There is no cross compilation support. ############################################################################### # NOTE: the literal for version (the second argument to AC_INIT should be a literal!) -AC_INIT([PSBLAS],3.7.0, [https://github.com/sfilippone/psblas3/issues]) +AC_INIT([PSBLAS],3.8.1, [https://github.com/sfilippone/psblas3/issues]) # VERSION is the file containing the PSBLAS version code # FIXME -psblas_cv_version="3.7.0" +psblas_cv_version="3.8.1" # A sample source file AC_CONFIG_SRCDIR([base/modules/psb_base_mod.f90]) @@ -56,7 +56,8 @@ AC_MSG_NOTICE([ documentation, you can make your own by hand for your needs. Be sure to specify the library paths of your interest. Examples: - ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + [ --with-spgpu=/path/to/spgpu] FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -790,6 +791,50 @@ fi +PAC_CHECK_CUDA() + +if test "x$pac_cv_have_cuda" == "xyes"; then + +PAC_CHECK_CUDA_VERSION() +CUDA_VERSION="$pac_cv_cuda_version"; +CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); +PAC_CHECK_SPGPU() +if test "x$pac_cv_have_spgpu" == "xyes" ; then + GPUD=gpud; + GPULD=gpuld; + EXTRALDLIBS="-lstdc++"; +fi +AC_MSG_NOTICE([At this point GPUTARGET is $GPUD $GPULD]) + +PAC_ARG_WITH_CUDACC() +if test "x$pac_cv_cudacc" == "x"; then + pac_cv_cudacc="30,35,37,50,60"; +fi +CUDEFINES=""; +for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` +do + CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; +done +if test "x$pac_cv_cuda_version" != "xunknown"; then + CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" +fi + +fi + +if test "x$pac_cv_ipk_size" != "x4"; then + AC_MSG_NOTICE([For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU]) + GPUD=""; + GPULD=""; + CUDEFINES=""; + CUDA_INCLUDES=""; + CUDA_LIBS=""; +fi + + + + ############################################################################### # Library target directory and archive files. ############################################################################### @@ -871,7 +916,23 @@ AC_SUBST(PRECLIBNAME) AC_SUBST(METHDLIBNAME) AC_SUBST(UTILLIBNAME) AC_SUBST(METISINCFILE) - +AC_SUBST(SPGPU_FLAGS) +AC_SUBST(SPGPU_LIBS) +AC_SUBST(SPGPU_DIR) +AC_SUBST(SPGPU_DEFINES) +AC_SUBST(SPGPU_INCLUDES) +AC_SUBST(SPGPU_INCDIR) +AC_SUBST(EXTRALDLIBS) +AC_SUBST(CUDA_DIR) +AC_SUBST(CUDA_DEFINES) +AC_SUBST(CUDA_INCLUDES) +AC_SUBST(CUDA_LIBS) +AC_SUBST(CUDA_VERSION) +AC_SUBST(CUDA_SHORT_VERSION) +AC_SUBST(CUDA_NVCC) +AC_SUBST(CUDEFINES) +AC_SUBST(GPUD) +AC_SUBST(GPULD) ############################################################################### # the following files will be created by Automake diff --git a/gpu/CUDA/Makefile b/gpu/CUDA/Makefile new file mode 100644 index 00000000..a1f9d48b --- /dev/null +++ b/gpu/CUDA/Makefile @@ -0,0 +1,38 @@ +TOPDIR=../.. +include $(TOPDIR)/Make.inc +# +# Libraries used +# +PSBLIBDIR=$(PSBLASDIR)/lib/ +PSBINCDIR=$(PSBLASDIR)/include +LIBDIR=$(TOPDIR)/lib +INCDIR=$(TOPDIR)/include +PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(PSBINCDIR) $(FIFLAG). +CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I.. +LIBNAME=libpsb_gpu.a + + + +CUDAOBJS=psi_cuda_c_CopyCooToElg.o psi_cuda_c_CopyCooToHlg.o \ +psi_cuda_d_CopyCooToElg.o psi_cuda_d_CopyCooToHlg.o \ +psi_cuda_s_CopyCooToElg.o psi_cuda_s_CopyCooToHlg.o \ +psi_cuda_z_CopyCooToElg.o psi_cuda_z_CopyCooToHlg.o + + + +objs: $(CUDAOBJS) + +lib: objs + ar cur ../$(LIBNAME) $(CUDAOBJS) + +$(CUDAOBJS): psi_cuda_common.cuh psi_cuda_CopyCooToElg.cuh psi_cuda_CopyCooToHlg.cuh + +clean: + /bin/rm -f $(CUDAOBJS) diff --git a/gpu/CUDA/psi_cuda_CopyCooToElg.cuh b/gpu/CUDA/psi_cuda_CopyCooToElg.cuh new file mode 100644 index 00000000..10a81a36 --- /dev/null +++ b/gpu/CUDA/psi_cuda_CopyCooToElg.cuh @@ -0,0 +1,104 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" +#include "psi_cuda_common.cuh" + + +#undef GEN_PSI_FUNC_NAME +#define GEN_PSI_FUNC_NAME(x) CONCAT(CONCAT(psi_cuda_,x),_CopyCooToElg) + +#define THREAD_BLOCK 256 + +#ifdef __cplusplus +extern "C" { +#endif + + + void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, + int baseIdx, int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM); + + +#ifdef __cplusplus +} +#endif + + + + + +__global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, int nr, int nza, + int baseIdx, int hacksz, int ldv, int nzm, + int *rS, int *devIdisp, int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ + int ir, k, ipnt, rsz,jc; + int ki = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + int i=ii+ki; + int idval=0; + + if (ki >= nrws) return; + if (i >= nr) return; + + ipnt=devIdisp[i]; + rsz=rS[i]; + ir = i; + for (k=0; kcurrentStream >>>(i,nrws, nr, nza, baseIdx, hacksz, ldv, nzm, + rS,devIdisp,devJa,devVal,idiag, rP,cM); + +} + + + + +void +GEN_PSI_FUNC_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ int i,j,k, nrws; + //int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX; + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + + //fprintf(stderr,"Loop on j: %d\n",j); + for (i=0; i +#include + +#include "cintrf.h" +#include "vectordev.h" +#include "psi_cuda_common.cuh" + + +#undef GEN_PSI_FUNC_NAME +#define GEN_PSI_FUNC_NAME(x) CONCAT(CONCAT(psi_cuda_,x),_CopyCooToHlg) + +#define THREAD_BLOCK 256 + +#ifdef __cplusplus +extern "C" { +#endif + +void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, + int noffs, int isz, int *rS, int *hackOffs, int *devIdisp, + int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM); + + + +#ifdef __cplusplus +} +#endif + + +__global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, int nr, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *rS, int *hackOffs, int *devIdisp, + int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ + int ir, k, ipnt, rsz,jc; + int ki = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + int i=ii+ki; + + if (ki >= nrws) return; + + + if (icurrentStream >>>(i,nrws,nr, nza, baseIdx, hacksz, noffs, isz, + rS,hackOffs,devIdisp,devJa,devVal,idiag,rP,cM); + +} + + +void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *rS, int *hackOffs, int *devIdisp, + int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ int i, nrws; + //int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX; + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + //fprintf(stderr,"Loop on j: %d\n",j); + for (i=0; i +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL c +#include "psi_cuda_CopyCooToElg.cuh" diff --git a/gpu/CUDA/psi_cuda_c_CopyCooToHlg.cu b/gpu/CUDA/psi_cuda_c_CopyCooToHlg.cu new file mode 100644 index 00000000..f2b5c86d --- /dev/null +++ b/gpu/CUDA/psi_cuda_c_CopyCooToHlg.cu @@ -0,0 +1,10 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL c +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/gpu/CUDA/psi_cuda_common.cuh b/gpu/CUDA/psi_cuda_common.cuh new file mode 100644 index 00000000..12d81f03 --- /dev/null +++ b/gpu/CUDA/psi_cuda_common.cuh @@ -0,0 +1,16 @@ +#pragma once + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) +#define MIN(A,B) ( (A)<(B) ? (A) : (B) ) +#define SQUARE(x) ((x)*(x)) +#define GET_ADDR(a,ix,iy,nc) a[(nc)*(ix)+(iy)] +#define GET_VAL(a,ix,iy,nc) (GET_ADDR(a,ix,iy,nc)) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } + +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +#endif diff --git a/gpu/CUDA/psi_cuda_d_CopyCooToElg.cu b/gpu/CUDA/psi_cuda_d_CopyCooToElg.cu new file mode 100644 index 00000000..f306ffe1 --- /dev/null +++ b/gpu/CUDA/psi_cuda_d_CopyCooToElg.cu @@ -0,0 +1,10 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE double +#define TYPE_SYMBOL d +#include "psi_cuda_CopyCooToElg.cuh" diff --git a/gpu/CUDA/psi_cuda_d_CopyCooToHlg.cu b/gpu/CUDA/psi_cuda_d_CopyCooToHlg.cu new file mode 100644 index 00000000..9c0e371e --- /dev/null +++ b/gpu/CUDA/psi_cuda_d_CopyCooToHlg.cu @@ -0,0 +1,10 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE double +#define TYPE_SYMBOL d +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/gpu/CUDA/psi_cuda_s_CopyCooToElg.cu b/gpu/CUDA/psi_cuda_s_CopyCooToElg.cu new file mode 100644 index 00000000..76e10de1 --- /dev/null +++ b/gpu/CUDA/psi_cuda_s_CopyCooToElg.cu @@ -0,0 +1,10 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE float +#define TYPE_SYMBOL s +#include "psi_cuda_CopyCooToElg.cuh" diff --git a/gpu/CUDA/psi_cuda_s_CopyCooToHlg.cu b/gpu/CUDA/psi_cuda_s_CopyCooToHlg.cu new file mode 100644 index 00000000..c2d76c0a --- /dev/null +++ b/gpu/CUDA/psi_cuda_s_CopyCooToHlg.cu @@ -0,0 +1,10 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE float +#define TYPE_SYMBOL s +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/gpu/CUDA/psi_cuda_z_CopyCooToElg.cu b/gpu/CUDA/psi_cuda_z_CopyCooToElg.cu new file mode 100644 index 00000000..a57ad637 --- /dev/null +++ b/gpu/CUDA/psi_cuda_z_CopyCooToElg.cu @@ -0,0 +1,10 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL z +#include "psi_cuda_CopyCooToElg.cuh" diff --git a/gpu/CUDA/psi_cuda_z_CopyCooToHlg.cu b/gpu/CUDA/psi_cuda_z_CopyCooToHlg.cu new file mode 100644 index 00000000..2ff9b869 --- /dev/null +++ b/gpu/CUDA/psi_cuda_z_CopyCooToHlg.cu @@ -0,0 +1,10 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL z +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/gpu/Makefile b/gpu/Makefile new file mode 100755 index 00000000..16e9c084 --- /dev/null +++ b/gpu/Makefile @@ -0,0 +1,134 @@ +include ../Make.inc +# +# Libraries used +# +LIBDIR=../lib +INCDIR=../include +MODDIR=../modules +PSBLAS_LIB= -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG). +CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) +LIBNAME=libpsb_gpu.a + + +FOBJS=cusparse_mod.o base_cusparse_mod.o \ + s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o \ + psb_vectordev_mod.o core_mod.o \ + psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_i_vectordev_mod.o\ + psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_base_vectordev_mod.o \ + elldev_mod.o hlldev_mod.o diagdev_mod.o hdiagdev_mod.o \ + psb_i_gpu_vect_mod.o \ + psb_d_gpu_vect_mod.o psb_s_gpu_vect_mod.o\ + psb_z_gpu_vect_mod.o psb_c_gpu_vect_mod.o\ + psb_d_elg_mat_mod.o psb_d_hlg_mat_mod.o \ + psb_d_hybg_mat_mod.o psb_d_csrg_mat_mod.o\ + psb_s_elg_mat_mod.o psb_s_hlg_mat_mod.o \ + psb_s_hybg_mat_mod.o psb_s_csrg_mat_mod.o\ + psb_c_elg_mat_mod.o psb_c_hlg_mat_mod.o \ + psb_c_hybg_mat_mod.o psb_c_csrg_mat_mod.o\ + psb_z_elg_mat_mod.o psb_z_hlg_mat_mod.o \ + psb_z_hybg_mat_mod.o psb_z_csrg_mat_mod.o\ + psb_gpu_env_mod.o psb_gpu_mod.o \ + psb_d_diag_mat_mod.o\ + psb_d_hdiag_mat_mod.o psb_s_hdiag_mat_mod.o\ + psb_s_dnsg_mat_mod.o psb_d_dnsg_mat_mod.o \ + psb_c_dnsg_mat_mod.o psb_z_dnsg_mat_mod.o \ + dnsdev_mod.o + +COBJS= elldev.o hlldev.o diagdev.o hdiagdev.o vectordev.o ivectordev.o dnsdev.o\ + svectordev.o dvectordev.o cvectordev.o zvectordev.o cuda_util.o \ + fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o + +OBJS=$(COBJS) $(FOBJS) + +lib: objs + +objs: $(OBJS) iobjs cudaobjs + /bin/cp -p *$(.mod) $(MODDIR) + /bin/cp -p *.h $(INCDIR) + +lib: ilib cudalib + ar cur $(LIBNAME) $(OBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + +dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o +psb_d_gpu_vect_mod.o psb_s_gpu_vect_mod.o psb_z_gpu_vect_mod.o psb_c_gpu_vect_mod.o: psb_i_gpu_vect_mod.o +psb_i_gpu_vect_mod.o : psb_vectordev_mod.o psb_gpu_env_mod.o +cusparse_mod.o: s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o +s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o : base_cusparse_mod.o +psb_d_hlg_mat_mod.o: hlldev_mod.o psb_d_gpu_vect_mod.o psb_gpu_env_mod.o +psb_d_elg_mat_mod.o: elldev_mod.o psb_d_gpu_vect_mod.o +psb_d_diag_mat_mod.o: diagdev_mod.o psb_d_gpu_vect_mod.o +psb_d_hdiag_mat_mod.o: hdiagdev_mod.o psb_d_gpu_vect_mod.o +psb_s_dnsg_mat_mod.o: dnsdev_mod.o psb_s_gpu_vect_mod.o +psb_d_dnsg_mat_mod.o: dnsdev_mod.o psb_d_gpu_vect_mod.o +psb_c_dnsg_mat_mod.o: dnsdev_mod.o psb_c_gpu_vect_mod.o +psb_z_dnsg_mat_mod.o: dnsdev_mod.o psb_z_gpu_vect_mod.o +psb_s_hlg_mat_mod.o: hlldev_mod.o psb_s_gpu_vect_mod.o psb_gpu_env_mod.o +psb_s_elg_mat_mod.o: elldev_mod.o psb_s_gpu_vect_mod.o +psb_s_diag_mat_mod.o: diagdev_mod.o psb_s_gpu_vect_mod.o +psb_s_hdiag_mat_mod.o: hdiagdev_mod.o psb_s_gpu_vect_mod.o +psb_s_csrg_mat_mod.o psb_s_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_d_csrg_mat_mod.o psb_d_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_z_hlg_mat_mod.o: hlldev_mod.o psb_z_gpu_vect_mod.o psb_gpu_env_mod.o +psb_z_elg_mat_mod.o: elldev_mod.o psb_z_gpu_vect_mod.o +psb_c_hlg_mat_mod.o: hlldev_mod.o psb_c_gpu_vect_mod.o psb_gpu_env_mod.o +psb_c_elg_mat_mod.o: elldev_mod.o psb_c_gpu_vect_mod.o +psb_c_csrg_mat_mod.o psb_c_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_z_csrg_mat_mod.o psb_z_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_vectordev_mod.o: psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_i_vectordev_mod.o +psb_i_vectordev_mod.o psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o: psb_base_vectordev_mod.o +vectordev.o: cuda_util.o vectordev.h +elldev.o: elldev.c +dnsdev.o: dnsdev.c +fcusparse.h elldev.c: elldev.h vectordev.h +fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o : fcusparse.h +fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o : fcusparse_fct.h +svectordev.o: svectordev.h vectordev.h +dvectordev.o: dvectordev.h vectordev.h +cvectordev.o: cvectordev.h vectordev.h +zvectordev.o: zvectordev.h vectordev.h +psb_gpu_env_mod.o: base_cusparse_mod.o +psb_gpu_mod.o: psb_gpu_env_mod.o psb_i_gpu_vect_mod.o\ + psb_d_gpu_vect_mod.o psb_s_gpu_vect_mod.o\ + psb_z_gpu_vect_mod.o psb_c_gpu_vect_mod.o\ + psb_d_elg_mat_mod.o psb_d_hlg_mat_mod.o \ + psb_d_hybg_mat_mod.o psb_d_csrg_mat_mod.o\ + psb_s_elg_mat_mod.o psb_s_hlg_mat_mod.o \ + psb_s_hybg_mat_mod.o psb_s_csrg_mat_mod.o\ + psb_c_elg_mat_mod.o psb_c_hlg_mat_mod.o \ + psb_c_hybg_mat_mod.o psb_c_csrg_mat_mod.o\ + psb_z_elg_mat_mod.o psb_z_hlg_mat_mod.o \ + psb_z_hybg_mat_mod.o psb_z_csrg_mat_mod.o\ + psb_d_diag_mat_mod.o \ + psb_d_hdiag_mat_mod.o psb_s_hdiag_mat_mod.o\ + psb_s_dnsg_mat_mod.o psb_d_dnsg_mat_mod.o \ + psb_c_dnsg_mat_mod.o psb_z_dnsg_mat_mod.o + +iobjs: $(FOBJS) + $(MAKE) -C impl objs +cudaobjs: $(FOBJS) + $(MAKE) -C CUDA objs + +ilib: objs + $(MAKE) -C impl lib LIBNAME=$(LIBNAME) +cudalib: objs + $(MAKE) -C CUDA lib LIBNAME=$(LIBNAME) + +clean: cclean iclean cudaclean + /bin/rm -f $(FOBJS) *$(.mod) *.a + +cclean: + /bin/rm -f $(COBJS) +iclean: + $(MAKE) -C impl clean +cudaclean: + $(MAKE) -C CUDA clean + +veryclean: clean diff --git a/gpu/base_cusparse_mod.F90 b/gpu/base_cusparse_mod.F90 new file mode 100644 index 00000000..9f5628be --- /dev/null +++ b/gpu/base_cusparse_mod.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module base_cusparse_mod + use iso_c_binding + ! Interface to CUSPARSE. + + enum, bind(c) + enumerator cusparse_status_success + enumerator cusparse_status_not_initialized + enumerator cusparse_status_alloc_failed + enumerator cusparse_status_invalid_value + enumerator cusparse_status_arch_mismatch + enumerator cusparse_status_mapping_error + enumerator cusparse_status_execution_failed + enumerator cusparse_status_internal_error + enumerator cusparse_status_matrix_type_not_supported + end enum + + enum, bind(c) + enumerator cusparse_matrix_type_general + enumerator cusparse_matrix_type_symmetric + enumerator cusparse_matrix_type_hermitian + enumerator cusparse_matrix_type_triangular + end enum + + enum, bind(c) + enumerator cusparse_fill_mode_lower + enumerator cusparse_fill_mode_upper + end enum + + enum, bind(c) + enumerator cusparse_diag_type_non_unit + enumerator cusparse_diag_type_unit + end enum + + enum, bind(c) + enumerator cusparse_index_base_zero + enumerator cusparse_index_base_one + end enum + + enum, bind(c) + enumerator cusparse_operation_non_transpose + enumerator cusparse_operation_transpose + enumerator cusparse_operation_conjugate_transpose + end enum + + enum, bind(c) + enumerator cusparse_direction_row + enumerator cusparse_direction_column + end enum + + +#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) + + interface + function FcusparseCreate() & + & bind(c,name="FcusparseCreate") result(res) + use iso_c_binding + integer(c_int) :: res + end function FcusparseCreate + end interface + + interface + function FcusparseDestroy() & + & bind(c,name="FcusparseDestroy") result(res) + use iso_c_binding + integer(c_int) :: res + end function FcusparseDestroy + end interface + +contains + + function initFcusparse() result(res) + implicit none + integer(c_int) :: res + + res = FcusparseCreate() + end function initFcusparse + + function closeFcusparse() result(res) + implicit none + integer(c_int) :: res + res = FcusparseDestroy() + end function closeFcusparse + +#endif +end module base_cusparse_mod diff --git a/gpu/c_cusparse_mod.F90 b/gpu/c_cusparse_mod.F90 new file mode 100644 index 00000000..e7d37173 --- /dev/null +++ b/gpu/c_cusparse_mod.F90 @@ -0,0 +1,305 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module c_cusparse_mod + use base_cusparse_mod + + type, bind(c) :: c_Cmat + type(c_ptr) :: Mat = c_null_ptr + end type c_Cmat + +#if CUDA_SHORT_VERSION <= 10 + type, bind(c) :: c_Hmat + type(c_ptr) :: Mat = c_null_ptr + end type c_Hmat +#endif + + +#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) + + interface CSRGDeviceFree + function c_CSRGDeviceFree(Mat) & + & bind(c,name="c_CSRGDeviceFree") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: res + end function c_CSRGDeviceFree + end interface + + interface CSRGDeviceSetMatType + function c_CSRGDeviceSetMatType(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatType") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatType + end interface + + interface CSRGDeviceSetMatFillMode + function c_CSRGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatFillMode") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatFillMode + end interface + + interface CSRGDeviceSetMatDiagType + function c_CSRGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatDiagType") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatDiagType + end interface + + interface CSRGDeviceSetMatIndexBase + function c_CSRGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatIndexBase + end interface + + interface CSRGDeviceCsrsmAnalysis + function c_CSRGDeviceCsrsmAnalysis(Mat) & + & bind(c,name="c_CSRGDeviceCsrsmAnalysis") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: res + end function c_CSRGDeviceCsrsmAnalysis + end interface + + interface CSRGDeviceAlloc + function c_CSRGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="c_CSRGDeviceAlloc") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function c_CSRGDeviceAlloc + end interface + + interface CSRGDeviceGetParms + function c_CSRGDeviceGetParms(Mat,nr,nc,nz) & + & bind(c,name="c_CSRGDeviceGetParms") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: nr, nc, nz + integer(c_int) :: res + end function c_CSRGDeviceGetParms + end interface + + interface spsvCSRGDevice + function c_spsvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spsvCSRGDevice") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spsvCSRGDevice + end interface + + interface spmvCSRGDevice + function c_spmvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spmvCSRGDevice") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spmvCSRGDevice + end interface + + interface CSRGHost2Device + function c_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="c_CSRGHost2Device") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_float_complex) :: val(*) + integer(c_int) :: res + end function c_CSRGHost2Device + end interface + + interface CSRGDevice2Host + function c_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="c_CSRGDevice2Host") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_float_complex) :: val(*) + integer(c_int) :: res + end function c_CSRGDevice2Host + end interface + +#if CUDA_SHORT_VERSION <=10 + interface HYBGDeviceAlloc + function c_HYBGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="c_HYBGDeviceAlloc") result(res) + use iso_c_binding + import c_hmat + type(c_Hmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function c_HYBGDeviceAlloc + end interface + + interface HYBGDeviceFree + function c_HYBGDeviceFree(Mat) & + & bind(c,name="c_HYBGDeviceFree") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int) :: res + end function c_HYBGDeviceFree + end interface + + interface HYBGDeviceSetMatType + function c_HYBGDeviceSetMatType(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatType") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatType + end interface + + interface HYBGDeviceSetMatFillMode + function c_HYBGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatFillMode") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatFillMode + end interface + + interface HYBGDeviceSetMatDiagType + function c_HYBGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatDiagType") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatDiagType + end interface + + interface HYBGDeviceSetMatIndexBase + function c_HYBGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatIndexBase + end interface + + interface HYBGDeviceHybsmAnalysis + function c_HYBGDeviceHybsmAnalysis(Mat) & + & bind(c,name="c_HYBGDeviceHybsmAnalysis") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int) :: res + end function c_HYBGDeviceHybsmAnalysis + end interface + + interface spsvHYBGDevice + function c_spsvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spsvHYBGDevice") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spsvHYBGDevice + end interface + + interface spmvHYBGDevice + function c_spmvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spmvHYBGDevice") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spmvHYBGDevice + end interface + + interface HYBGHost2Device + function c_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="c_HYBGHost2Device") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_float_complex) :: val(*) + integer(c_int) :: res + end function c_HYBGHost2Device + end interface +#endif + +#endif + +end module c_cusparse_mod diff --git a/gpu/ccusparse.c b/gpu/ccusparse.c new file mode 100644 index 00000000..6f1cfdb3 --- /dev/null +++ b/gpu/ccusparse.c @@ -0,0 +1,97 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#ifdef HAVE_SPGPU +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + +/* Single precision complex */ +#define TYPE float complex +#define CUSPARSE_BASE_TYPE CUDA_C_32F +#define T_CSRGDeviceMat c_CSRGDeviceMat +#define T_Cmat c_Cmat +#define T_spmvCSRGDevice c_spmvCSRGDevice +#define T_spsvCSRGDevice c_spsvCSRGDevice +#define T_CSRGDeviceAlloc c_CSRGDeviceAlloc +#define T_CSRGDeviceFree c_CSRGDeviceFree +#define T_CSRGHost2Device c_CSRGHost2Device +#define T_CSRGDevice2Host c_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode c_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType c_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms c_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 + +#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseCcsrmv +#define cusparseTcsrsv_solve cusparseCcsrsv_solve +#define cusparseTcsrsv_analysis cusparseCcsrsv_analysis + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve + +#else + +#define T_HYBGDeviceMat c_HYBGDeviceMat +#define T_Hmat c_Hmat +#define T_HYBGDeviceFree c_HYBGDeviceFree +#define T_spmvHYBGDevice c_spmvHYBGDevice +#define T_HYBGDeviceAlloc c_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType c_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase c_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType c_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode c_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis c_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice c_spsvHYBGDevice +#define T_HYBGHost2Device c_HYBGHost2Device +#define cusparseThybmv cusparseChybmv +#define cusparseThybsv_solve cusparseChybsv_solve +#define cusparseThybsv_analysis cusparseChybsv_analysis +#define cusparseTcsr2hyb cusparseCcsr2hyb +#endif + +#include "fcusparse_fct.h" + +#endif diff --git a/gpu/cintrf.h b/gpu/cintrf.h new file mode 100644 index 00000000..1a3528aa --- /dev/null +++ b/gpu/cintrf.h @@ -0,0 +1,51 @@ + /* Parallel Sparse BLAS SPGPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _CINTRF_H_ +#define _CINTRF_H_ + +#include +#include + +#if defined(HAVE_SPGPU) && defined(HAVE_CUDA) +#include "core.h" +#include "cuda_util.h" +#include "vector.h" +#include "vectordev.h" + +#define ELL_PITCH_ALIGN_S 32 +#define ELL_PITCH_ALIGN_D 16 + + +#endif + +#endif diff --git a/gpu/core_mod.f90 b/gpu/core_mod.f90 new file mode 100644 index 00000000..d30f8a99 --- /dev/null +++ b/gpu/core_mod.f90 @@ -0,0 +1,53 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module core_mod + use iso_c_binding + + integer(c_int), parameter :: spgpu_type_int = 0 + integer(c_int), parameter :: spgpu_type_float = 1 + integer(c_int), parameter :: spgpu_type_double = 2 + integer(c_int), parameter :: spgpu_type_complex_float = 3 + integer(c_int), parameter :: spgpu_type_complex_double = 4 + integer(c_int), parameter :: spgpu_success = 0 + integer(c_int), parameter :: spgpu_unsupported = 1 + integer(c_int), parameter :: spgpu_unspecified = 2 + integer(c_int), parameter :: spgpu_outofmem = 3 + + interface + subroutine psb_cudaSync() & + & bind(c,name='cudaSync') + use iso_c_binding + end subroutine psb_cudaSync + end interface + +end module core_mod diff --git a/gpu/cuda_util.c b/gpu/cuda_util.c new file mode 100644 index 00000000..63c38b53 --- /dev/null +++ b/gpu/cuda_util.c @@ -0,0 +1,808 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include "cuda_util.h" + +#if defined(HAVE_CUDA) + + +static int hasUVA=-1; +static struct cudaDeviceProp *prop=NULL; +static spgpuHandle_t psb_gpu_handle = NULL; +static cublasHandle_t psb_cublas_handle = NULL; + + +int allocRemoteBuffer(void** buffer, int count) +{ + cudaError_t err = cudaMalloc(buffer, count); + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA allocRemoteBuffer for %d bytes Error: %s \n", + count, cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int hostRegisterMapped(void *pointer, long size) +{ + cudaError_t err = cudaHostRegister(pointer, size, cudaHostRegisterMapped); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA hostRegisterMapped Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int getDevicePointer(void **d_p, void * h_p) +{ + cudaError_t err = cudaHostGetDevicePointer(d_p,h_p,0); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA getDevicePointer Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int registerMappedMemory(void *buffer, void **dp, int size) +{ + //cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped); + cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterMapped); + if (err == cudaSuccess) err = cudaHostGetDevicePointer(dp,buffer,0); + + if (err == cudaSuccess) + { + err = cudaHostGetDevicePointer(dp,buffer,0); + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + } + else + { + fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int allocMappedMemory(void **buffer, void **dp, int size) +{ + cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped); + if (err == 0) err = cudaHostGetDevicePointer(dp,*buffer,0); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA allocMappedMemory Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int unregisterMappedMemory(void *buffer) +{ + //cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped); + cudaError_t err = cudaHostUnregister(buffer); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA unregisterMappedMemory Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int writeRemoteBuffer(void* hostSrc, void* buffer, int count) +{ + cudaError_t err = cudaMemcpy(buffer, hostSrc, count, cudaMemcpyHostToDevice); + + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error writeRemoteBuffer: %s %p %p %d\n", + cudaGetErrorString(err),buffer, hostSrc, count); + return SPGPU_UNSPECIFIED; + } +} + +int readRemoteBuffer(void* hostDest, void* buffer, int count) +{ + + + cudaError_t err1; + cudaError_t err; +#if 0 + { + err1 =cudaGetLastError(); + fprintf(stderr,"CUDA Error prior to readRemoteBuffer: %s %d\n", + cudaGetErrorString(err1),err1); + } + +#endif + err = cudaMemcpy(hostDest, buffer, count, cudaMemcpyDeviceToHost); + + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error readRemoteBuffer: %s %p %p %d %d\n", + cudaGetErrorString(err),hostDest,buffer,count,err); + return SPGPU_UNSPECIFIED; + } +} + +int freeRemoteBuffer(void* buffer) +{ + cudaError_t err = cudaFree(buffer); + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error freeRemoteBuffer: %s %p\n", cudaGetErrorString(err),buffer); + return SPGPU_UNSPECIFIED; + } +} + +int gpuInit(int dev) +{ + + int count,err; + + if ((err=cudaSetDeviceFlags(cudaDeviceMapHost))!=cudaSuccess) + fprintf(stderr,"Error On SetDeviceFlags: %d '%s'\n",err,cudaGetErrorString(err)); + if ((prop=(struct cudaDeviceProp *) malloc(sizeof(struct cudaDeviceProp)))==NULL) { + fprintf(stderr,"CUDA Error gpuInit3: not malloced prop\n"); + return SPGPU_UNSPECIFIED; + } + err = setDevice(dev); + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error gpuInit2: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + if (!psb_cublas_handle) + psb_gpuCreateCublasHandle(); + hasUVA=getDeviceHasUVA(); + + return err; + +} + +void gpuClose() +{ + cudaStream_t st1, st2; + if (! psb_gpu_handle) + st1=spgpuGetStream(psb_gpu_handle); + if (! psb_cublas_handle) + cublasGetStream(psb_cublas_handle,&st2); + + psb_gpuDestroyHandle(); + if (st1 != st2) + psb_gpuDestroyCublasHandle(); + free(prop); + prop=NULL; + hasUVA=-1; +} + + +int setDevice(int dev) +{ + int count,err,idev; + + err = cudaGetDeviceCount(&count); + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error setDevice: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + + if ((0<=dev)&&(devunifiedAddressing; + return(count); +} + +int getGPUMultiProcessors() +{ int count=0; + if (prop!=NULL) + count = prop->multiProcessorCount; + return(count); +} + + +int getGPUMemoryBusWidth() +{ int count=0; +#if CUDART_VERSION >= 5000 + if (prop!=NULL) + count = prop->memoryBusWidth; +#endif + return(count); +} +int getGPUMemoryClockRate() +{ int count=0; +#if CUDART_VERSION >= 5000 + if (prop!=NULL) + count = prop->memoryClockRate; +#endif + return(count); +} +int getGPUWarpSize() +{ int count=0; + if (prop!=NULL) + count = prop->warpSize; + return(count); +} +int getGPUMaxThreadsPerBlock() +{ int count=0; + if (prop!=NULL) + count = prop->maxThreadsPerBlock; + return(count); +} +int getGPUMaxThreadsPerMP() +{ int count=0; + if (prop!=NULL) + count = prop->maxThreadsPerMultiProcessor; + return(count); +} +int getGPUMaxRegistersPerBlock() +{ int count=0; + if (prop!=NULL) + count = prop->regsPerBlock; + return(count); +} + +void cpyGPUNameString(char *cstring) +{ + *cstring='\0'; + if (prop!=NULL) + strcpy(cstring,prop->name); + +} + +int DeviceHasUVA() +{ + return(hasUVA == 1); +} + + +int getDeviceCount() +{ int count; + cudaError_t err; + err = cudaGetDeviceCount(&count); + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error getDeviceCount: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + return(count); +} + +void cudaSync() +{ + cudaError_t err; + err = cudaDeviceSynchronize(); + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error cudaSync: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } +} + +void cudaReset() +{ + cudaError_t err; + err = cudaDeviceReset(); + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error Reset: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } +} + + +spgpuHandle_t psb_gpuGetHandle() +{ + return psb_gpu_handle; +} + +void psb_gpuCreateHandle() +{ + if (!psb_gpu_handle) + spgpuCreate(&psb_gpu_handle, getDevice()); + +} + +void psb_gpuDestroyHandle() +{ + if (!psb_gpu_handle) + spgpuDestroy(psb_gpu_handle); + psb_gpu_handle = NULL; +} + +cudaStream_t psb_gpuGetStream() +{ + return spgpuGetStream(psb_gpu_handle); +} + +void psb_gpuSetStream(cudaStream_t stream) +{ + spgpuSetStream(psb_gpu_handle, stream); + return ; +} + + + +cublasHandle_t psb_gpuGetCublasHandle() +{ + if (!psb_cublas_handle) + psb_gpuCreateCublasHandle(); + return psb_cublas_handle; +} +void psb_gpuCreateCublasHandle() +{ if (!psb_cublas_handle) + cublasCreate(&psb_cublas_handle); +} +void psb_gpuDestroyCublasHandle() +{ + if (!psb_cublas_handle) + cublasDestroy(psb_cublas_handle); + psb_cublas_handle=NULL; +} + + + + + +/* Simple memory tools */ + +int allocateInt(void **d_int, int n) +{ + return allocRemoteBuffer((void **)(d_int), n*sizeof(int)); +} + +int writeInt(void *d_int, int* h_int, int n) +{ + int i,j; + int *di; + i = writeRemoteBuffer((void*)h_int, (void*)d_int, n*sizeof(int)); + return i; +} + +int readInt(void* d_int, int* h_int, int n) +{ int i; + i = readRemoteBuffer((void *) h_int, (void *) d_int, n*sizeof(int)); + //cudaSync(); + return(i); +} + +int writeIntFirst(int first, void *d_int, int* h_int, int n, int IndexBase) +{ + int i,j; + int *di=(int *) d_int; + di = &(di[first-IndexBase]); + i = writeRemoteBuffer((void*)h_int, (void*)di, n*sizeof(int)); + return i; +} + +int readIntFirst(int first,void* d_int, int* h_int, int n, int IndexBase) +{ int i; + int *di=(int *) d_int; + di = &(di[first-IndexBase]); + i = readRemoteBuffer((void *) h_int, (void *) di, n*sizeof(int)); + //cudaSync(); + return(i); +} + +int allocateMultiInt(void **d_int, int m, int n) +{ + return allocRemoteBuffer((void **)(d_int), m*n*sizeof(int)); +} + +int writeMultiInt(void *d_int, int* h_int, int m, int n) +{ + int i,j; + int *di; + i = writeRemoteBuffer((void*)h_int, (void*)d_int, m*n*sizeof(int)); + return i; +} + +int readMultiInt(void* d_int, int* h_int, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_int, (void *) d_int, m*n*sizeof(int)); + //cudaSync(); + return(i); +} + +void freeInt(void *d_int) +{ + //printf("Before freeInt\n"); + freeRemoteBuffer(d_int); +} + + + + +int allocateFloat(void **d_float, int n) +{ + return allocRemoteBuffer((void **)(d_float), n*sizeof(float)); +} + +int writeFloat(void *d_float, float* h_float, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_float, (void*)d_float, n*sizeof(float)); + + return i; +} + +int readFloat(void* d_float, float* h_float, int n) +{ int i; + i = readRemoteBuffer((void *) h_float, (void *) d_float, n*sizeof(float)); + + return(i); +} + +int writeFloatFirst(int df, void *d_float, float* h_float, int n, int IndexBase) +{ + int i; + + float *dv=(float *) d_float; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_float, (void*)dv, n*sizeof(float)); + + return i; +} + +int readFloatFirst(int df, void* d_float, float* h_float, int n, int IndexBase) +{ int i; + float *dv=(float *) d_float; + dv = &dv[df-IndexBase]; + //fprintf(stderr,"readFloatFirst: %d %p %p %p %d \n",df,d_float,dv,h_float,n); + i = readRemoteBuffer((void *) h_float, (void *) dv, n*sizeof(float)); + + return(i); +} + + +int allocateMultiFloat(void **d_float, int m, int n) +{ + return allocRemoteBuffer((void **)(d_float), m*n*sizeof(float)); +} + +int writeMultiFloat(void *d_float, float* h_float, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_float, (void*)d_float, m*n*sizeof(float)); + return i; +} + +int readMultiFloat(void* d_float, float* h_float, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_float, (void *) d_float, m*n*sizeof(float)); + //cudaSync(); + return(i); +} + +void freeFloat(void *d_float) +{ + freeRemoteBuffer(d_float); +} + + + +int allocateDouble(void **d_double, int n) +{ + return allocRemoteBuffer((void **)(d_double), n*sizeof(double)); +} + +int writeDouble(void *d_double, double* h_double, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_double, (void*)d_double, n*sizeof(double)); + + return i; +} + +int readDouble(void* d_double, double* h_double, int n) +{ int i; + i = readRemoteBuffer((void *) h_double, (void *) d_double, n*sizeof(double)); + + return(i); +} + +int writeDoubleFirst(int df, void *d_double, double* h_double, int n, int IndexBase) +{ + int i; + + double *dv=(double *) d_double; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_double, (void*)dv, n*sizeof(double)); + + return i; +} + +int readDoubleFirst(int df, void* d_double, double* h_double, int n, int IndexBase) +{ int i; + double *dv=(double *) d_double; + dv = &dv[df-IndexBase]; + //fprintf(stderr,"readDoubleFirst: %d %p %p %p %d \n",df,d_double,dv,h_double,n); + i = readRemoteBuffer((void *) h_double, (void *) dv, n*sizeof(double)); + + return(i); +} + +int allocateMultiDouble(void **d_double, int m, int n) +{ + return allocRemoteBuffer((void **)(d_double), m*n*sizeof(double)); +} + +int writeMultiDouble(void *d_double, double* h_double, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_double, (void*)d_double, m*n*sizeof(double)); + return i; +} + +int readMultiDouble(void* d_double, double* h_double, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_double, (void *) d_double, m*n*sizeof(double)); + //cudaSync(); + return(i); +} + +void freeDouble(void *d_double) +{ + freeRemoteBuffer(d_double); +} + + + +int allocateFloatComplex(void **d_FloatComplex, int n) +{ + return allocRemoteBuffer((void **)(d_FloatComplex), n*sizeof(cuFloatComplex)); +} + +int writeFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, n*sizeof(cuFloatComplex)); + + return i; +} + +int readFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int n) +{ int i; + i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, n*sizeof(cuFloatComplex)); + + return(i); +} + +int allocateMultiFloatComplex(void **d_FloatComplex, int m, int n) +{ + return allocRemoteBuffer((void **)(d_FloatComplex), m*n*sizeof(cuFloatComplex)); +} + +int writeMultiFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, m*n*sizeof(cuFloatComplex)); + return i; +} + +int readMultiFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, m*n*sizeof(cuFloatComplex)); + //cudaSync(); + return(i); +} + +int writeFloatComplexFirst(int df, void *d_floatComplex, + cuFloatComplex* h_floatComplex, int n, int IndexBase) +{ + int i; + + cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_floatComplex, (void*)dv, n*sizeof(cuFloatComplex)); + + return i; +} + +int readFloatComplexFirst(int df, void* d_floatComplex, cuFloatComplex* h_floatComplex, + int n, int IndexBase) +{ int i; + cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex; + dv = &dv[df-IndexBase]; + i = readRemoteBuffer((void *) h_floatComplex, (void *) dv, n*sizeof(cuFloatComplex)); + + return(i); +} + +void freeFloatComplex(void *d_FloatComplex) +{ + freeRemoteBuffer(d_FloatComplex); +} + + + + +int allocateDoubleComplex(void **d_DoubleComplex, int n) +{ + return allocRemoteBuffer((void **)(d_DoubleComplex), n*sizeof(cuDoubleComplex)); +} + +int writeDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, n*sizeof(cuDoubleComplex)); + + return i; +} + +int readDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n) +{ int i; + i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, n*sizeof(cuDoubleComplex)); + + return(i); +} + +int writeDoubleComplexFirst(int df, void *d_doubleComplex, + cuDoubleComplex* h_doubleComplex, int n, int IndexBase) +{ + int i; + + cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_doubleComplex, (void*)dv, n*sizeof(cuDoubleComplex)); + + return i; +} + +int readDoubleComplexFirst(int df, void* d_doubleComplex, cuDoubleComplex* h_doubleComplex, + int n, int IndexBase) +{ int i; + cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex; + dv = &dv[df-IndexBase]; + i = readRemoteBuffer((void *) h_doubleComplex, (void *) dv, n*sizeof(cuDoubleComplex)); + + return(i); +} + +int allocateMultiDoubleComplex(void **d_DoubleComplex, int m, int n) +{ + return allocRemoteBuffer((void **)(d_DoubleComplex), m*n*sizeof(cuDoubleComplex)); +} + +int writeMultiDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, m*n*sizeof(cuDoubleComplex)); + return i; +} + +int readMultiDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, m*n*sizeof(cuDoubleComplex)); + //cudaSync(); + return(i); +} + +void freeDoubleComplex(void *d_DoubleComplex) +{ + freeRemoteBuffer(d_DoubleComplex); +} + + + +double etime() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6; + return(temp); +} + + + + +#endif diff --git a/gpu/cuda_util.h b/gpu/cuda_util.h new file mode 100644 index 00000000..03c7b488 --- /dev/null +++ b/gpu/cuda_util.h @@ -0,0 +1,139 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _CUDA_UTIL_H_ +#define _CUDA_UTIL_H_ + +#include +#include +#include +#include + +#if defined(HAVE_CUDA) +#include "cuda_runtime.h" +#include "core.h" +#include "cuComplex.h" +#include "fcusparse.h" +#include "cublas_v2.h" + +int allocRemoteBuffer(void** buffer, int count); +int allocMappedMemory(void **buffer, void **dp, int size); +int registerMappedMemory(void *buffer, void **dp, int size); +int unregisterMappedMemory(void *buffer); +int writeRemoteBuffer(void* hostSrc, void* buffer, int count); +int readRemoteBuffer(void* hostDest, void* buffer, int count); +int freeRemoteBuffer(void* buffer); +int gpuInit(int dev); +int getDeviceCount(); +int getDevice(); +int setDevice(int dev); +int getGPUMultiProcessors(); +int getGPUMemoryBusWidth(); +int getGPUMemoryClockRate(); +int getGPUWarpSize(); +int getGPUMaxThreadsPerBlock(); +int getGPUMaxThreadsPerMP(); +int getGPUMaxRegistersPerBlock(); +void cpyGPUNameString(char *cstring); + + +void cudaSync(); +void cudaReset(); +void gpuClose(); + + +spgpuHandle_t psb_gpuGetHandle(); +void psb_gpuCreateHandle(); +void psb_gpuDestroyHandle(); +cudaStream_t psb_gpuGetStream(); +void psb_gpuSetStream(cudaStream_t stream); + +cublasHandle_t psb_gpuGetCublasHandle(); +void psb_gpuCreateCublasHandle(); +void psb_gpuDestroyCublasHandle(); + + +int allocateInt(void **, int); +int allocateMultiInt(void **, int, int); +int writeInt(void *, int *, int); +int writeMultiInt(void *, int* , int , int ); +int readInt(void *, int *, int); +int readMultiInt(void*, int*, int, int ); +int writeIntFirst(int,void *, int *, int,int); +int readIntFirst(int,void *, int *, int,int); +void freeInt(void *); + +int allocateFloat(void **, int); +int allocateMultiFloat(void **, int, int); +int writeFloat(void *, float *, int); +int writeMultiFloat(void *, float* , int , int ); +int readFloat(void *, float*, int); +int readMultiFloat(void*, float*, int, int ); +int writeFloatFirst(int, void *, float*, int, int); +int readFloatFirst(int, void *, float*, int, int); +void freeFloat(void *); + +int allocateDouble(void **, int); +int allocateMultiDouble(void **, int, int); +int writeDouble(void *, double*, int); +int writeMultiDouble(void *, double* , int , int ); +int readDouble(void *, double*, int); +int readMultiDouble(void*, double*, int, int ); +int writeDoubleFirst(int, void *, double*, int, int); +int readDoubleFirst(int, void *, double*, int, int); +void freeDouble(void *); + +int allocateFloatComplex(void **, int); +int allocateMultiFloatComplex(void **, int, int); +int writeFloatComplex(void *, cuFloatComplex*, int); +int writeMultiFloatComplex(void *, cuFloatComplex* , int , int ); +int readFloatComplex(void *, cuFloatComplex*, int); +int readMultiFloatComplex(void*, cuFloatComplex*, int, int ); +int writeFloatComplexFirst(int, void *, cuFloatComplex*, int, int); +int readFloatComplexFirst(int, void *, cuFloatComplex*, int, int); +void freeFloatComplex(void *); + +int allocateDoubleComplex(void **, int); +int allocateMultiDoubleComplex(void **, int, int); +int writeDoubleComplex(void *, cuDoubleComplex*, int); +int writeMultiDoubleComplex(void *, cuDoubleComplex* , int , int ); +int readDoubleComplex(void *, cuDoubleComplex*, int); +int readMultiDoubleComplex(void*, cuDoubleComplex*, int, int ); +int writeDoubleComplexFirst(int, void *, cuDoubleComplex*, int, int); +int readDoubleComplexFirst(int, void *, cuDoubleComplex*, int, int); +void freeDoubleComplex(void *); + +double etime(); + +#endif + +#endif diff --git a/gpu/cusparse_mod.F90 b/gpu/cusparse_mod.F90 new file mode 100644 index 00000000..4ae16cff --- /dev/null +++ b/gpu/cusparse_mod.F90 @@ -0,0 +1,38 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module cusparse_mod + use base_cusparse_mod + use s_cusparse_mod + use d_cusparse_mod + use c_cusparse_mod + use z_cusparse_mod +end module cusparse_mod diff --git a/gpu/cvectordev.c b/gpu/cvectordev.c new file mode 100644 index 00000000..db55caef --- /dev/null +++ b/gpu/cvectordev.c @@ -0,0 +1,325 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +#if defined(HAVE_SPGPU) +//#include "utils.h" +//#include "common.h" +#include "cvectordev.h" + + +int registerMappedFloatComplex(void *buff, void **d_p, int n, cuFloatComplex dummy) +{ + return registerMappedMemory(buff,d_p,n*sizeof(cuFloatComplex)); +} + +int writeMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(cuFloatComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceFloatComplexR2(void* deviceVec, cuFloatComplex* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceFloatComplex(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceFloatComplexR2",i); + } + return(i); +} + +int readMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(cuFloatComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloat",i); + } + return(i); +} + +int readMultiVecDeviceFloatComplexR2(void* deviceVec, cuFloatComplex* hostVec, int ld) +{ int i; + i = readMultiVecDeviceFloatComplex(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloatComplexR2",i); + } + return(i); +} + +int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuCsetscal(handle, first, last, indexBase, val, (cuFloatComplex *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceFloatComplex(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + cuFloatComplex beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = make_cuFloatComplex(0.0, 0.0); + else if (dupl == INS_ADD) + beta = make_cuFloatComplex(1.0, 0.0); + else + beta = make_cuFloatComplex(0.0, 0.0); + + spgpuCscat(handle, (cuFloatComplex *) devVecX->v_, n, (cuFloatComplex*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceFloatComplex(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + cuFloatComplex *hv = (cuFloatComplex *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuCgath(handle,hv, n, idx,indexBase, + (cuFloatComplex *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void *deviceIdx, + int hfirst, void* host_values, + int indexBase, cuFloatComplex beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceFloatComplex(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, + indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, + int first, void *indexes, + int hfirst, void* host_values, + int indexBase, cuFloatComplex beta) +{ int i=0; + cuFloatComplex *hv = (cuFloatComplex *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuCscat(handle, (cuFloatComplex *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + + +int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuCmnrm2(handle, y_res, n,(cuFloatComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuCmamax(handle, y_res, n,(cuFloatComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuCmasum(handle, y_res, n,(cuFloatComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + + return(i); +} + +int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuCscal(handle, (cuFloatComplex *)devVecA->v_, devVecA->pitch_, + alpha, (cuFloatComplex *)devVecA->v_); + return(i); +} + +int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, + void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuCmdot(handle, y_res, n, (cuFloatComplex*)devVecA->v_, + (cuFloatComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); + return(i); +} + +int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVecX, + cuFloatComplex beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuCaxpby(handle,(cuFloatComplex*)devVecY->v_+pitch*j, n, beta, + (cuFloatComplex*)devVecY->v_+pitch*j, alpha, + (cuFloatComplex*) devVecX->v_+pitch*j); + return(i); +} + +int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, + void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCmaxy(handle, (cuFloatComplex*)devVecB->v_, n, alpha, + (cuFloatComplex*)devVecA->v_, + (cuFloatComplex*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, + void *deviceVecB, cuFloatComplex beta, + void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuCmaxypbz(handle, (cuFloatComplex*)devVecZ->v_, n, beta, + (cuFloatComplex*)devVecZ->v_, + alpha, (cuFloatComplex*) devVecA->v_, (cuFloatComplex*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + + +int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCabs(handle, (cuFloatComplex*)devVecB->v_, n, + alpha, (cuFloatComplex*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_gpuGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuCabs(handle, (cuFloatComplex*)devVecA->v_, n, + alpha, (cuFloatComplex*)devVecA->v_); + + return(i); +} + +#endif + diff --git a/gpu/cvectordev.h b/gpu/cvectordev.h new file mode 100644 index 00000000..f58fcca7 --- /dev/null +++ b/gpu/cvectordev.h @@ -0,0 +1,81 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +#if defined(HAVE_SPGPU) +//#include "utils.h" +#include +#include "cuComplex.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +int registerMappedFloatComplex(void *, void **, int, cuFloatComplex); +int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec); +int writeMultiVecDeviceFloatComplexR2(void* deviceMultiVec, cuFloatComplex* hostMultiVec, int ld); +int readMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec); +int readMultiVecDeviceFloatComplexR2(void* deviceMultiVec, cuFloatComplex* hostMultiVec, int ld); + +int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceFloatComplex(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, cuFloatComplex beta); +int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, cuFloatComplex beta); + +int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA); +int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); +int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); +int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); +int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY); +int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, + void *deviceVecB, cuFloatComplex beta, void *deviceVecZ); +int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA); +int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, + void *deviceVecA, void *deviceVecB); + + +#endif diff --git a/gpu/d_cusparse_mod.F90 b/gpu/d_cusparse_mod.F90 new file mode 100644 index 00000000..cd8bd52f --- /dev/null +++ b/gpu/d_cusparse_mod.F90 @@ -0,0 +1,305 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module d_cusparse_mod + use base_cusparse_mod + + type, bind(c) :: d_Cmat + type(c_ptr) :: Mat = c_null_ptr + end type d_Cmat + +#if CUDA_SHORT_VERSION <= 10 + type, bind(c) :: d_Hmat + type(c_ptr) :: Mat = c_null_ptr + end type d_Hmat +#endif + + +#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) + + interface CSRGDeviceFree + function d_CSRGDeviceFree(Mat) & + & bind(c,name="d_CSRGDeviceFree") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: res + end function d_CSRGDeviceFree + end interface + + interface CSRGDeviceSetMatType + function d_CSRGDeviceSetMatType(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatType") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatType + end interface + + interface CSRGDeviceSetMatFillMode + function d_CSRGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatFillMode") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatFillMode + end interface + + interface CSRGDeviceSetMatDiagType + function d_CSRGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatDiagType") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatDiagType + end interface + + interface CSRGDeviceSetMatIndexBase + function d_CSRGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatIndexBase + end interface + + interface CSRGDeviceCsrsmAnalysis + function d_CSRGDeviceCsrsmAnalysis(Mat) & + & bind(c,name="d_CSRGDeviceCsrsmAnalysis") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: res + end function d_CSRGDeviceCsrsmAnalysis + end interface + + interface CSRGDeviceAlloc + function d_CSRGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="d_CSRGDeviceAlloc") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function d_CSRGDeviceAlloc + end interface + + interface CSRGDeviceGetParms + function d_CSRGDeviceGetParms(Mat,nr,nc,nz) & + & bind(c,name="d_CSRGDeviceGetParms") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: nr, nc, nz + integer(c_int) :: res + end function d_CSRGDeviceGetParms + end interface + + interface spsvCSRGDevice + function d_spsvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spsvCSRGDevice") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spsvCSRGDevice + end interface + + interface spmvCSRGDevice + function d_spmvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spmvCSRGDevice") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spmvCSRGDevice + end interface + + interface CSRGHost2Device + function d_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="d_CSRGHost2Device") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_double) :: val(*) + integer(c_int) :: res + end function d_CSRGHost2Device + end interface + + interface CSRGDevice2Host + function d_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="d_CSRGDevice2Host") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_double) :: val(*) + integer(c_int) :: res + end function d_CSRGDevice2Host + end interface + +#if CUDA_SHORT_VERSION <= 10 + interface HYBGDeviceAlloc + function d_HYBGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="d_HYBGDeviceAlloc") result(res) + use iso_c_binding + import d_hmat + type(d_Hmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function d_HYBGDeviceAlloc + end interface + + interface HYBGDeviceFree + function d_HYBGDeviceFree(Mat) & + & bind(c,name="d_HYBGDeviceFree") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int) :: res + end function d_HYBGDeviceFree + end interface + + interface HYBGDeviceSetMatType + function d_HYBGDeviceSetMatType(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatType") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatType + end interface + + interface HYBGDeviceSetMatFillMode + function d_HYBGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatFillMode") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatFillMode + end interface + + interface HYBGDeviceSetMatDiagType + function d_HYBGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatDiagType") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatDiagType + end interface + + interface HYBGDeviceSetMatIndexBase + function d_HYBGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatIndexBase + end interface + + interface HYBGDeviceHybsmAnalysis + function d_HYBGDeviceHybsmAnalysis(Mat) & + & bind(c,name="d_HYBGDeviceHybsmAnalysis") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int) :: res + end function d_HYBGDeviceHybsmAnalysis + end interface + + interface spsvHYBGDevice + function d_spsvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spsvHYBGDevice") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spsvHYBGDevice + end interface + + interface spmvHYBGDevice + function d_spmvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spmvHYBGDevice") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spmvHYBGDevice + end interface + + interface HYBGHost2Device + function d_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="d_HYBGHost2Device") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_double) :: val(*) + integer(c_int) :: res + end function d_HYBGHost2Device + end interface +#endif + +#endif + +end module d_cusparse_mod diff --git a/gpu/dcusparse.c b/gpu/dcusparse.c new file mode 100644 index 00000000..9659c1f9 --- /dev/null +++ b/gpu/dcusparse.c @@ -0,0 +1,95 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#ifdef HAVE_SPGPU +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + + +/* Double precision real */ +#define TYPE double +#define CUSPARSE_BASE_TYPE CUDA_R_64F +#define T_CSRGDeviceMat d_CSRGDeviceMat +#define T_Cmat d_Cmat +#define T_spmvCSRGDevice d_spmvCSRGDevice +#define T_spsvCSRGDevice d_spsvCSRGDevice +#define T_CSRGDeviceAlloc d_CSRGDeviceAlloc +#define T_CSRGDeviceFree d_CSRGDeviceFree +#define T_CSRGHost2Device d_CSRGHost2Device +#define T_CSRGDevice2Host d_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode d_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType d_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms d_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis d_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseDcsrmv +#define cusparseTcsrsv_solve cusparseDcsrsv_solve +#define cusparseTcsrsv_analysis cusparseDcsrsv_analysis +#define T_HYBGDeviceMat d_HYBGDeviceMat +#define T_Hmat d_Hmat +#define T_HYBGDeviceFree d_HYBGDeviceFree +#define T_spmvHYBGDevice d_spmvHYBGDevice +#define T_HYBGDeviceAlloc d_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType d_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase d_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType d_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode d_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis d_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice d_spsvHYBGDevice +#define T_HYBGHost2Device d_HYBGHost2Device +#define cusparseThybmv cusparseDhybmv +#define cusparseThybsv_solve cusparseDhybsv_solve +#define cusparseThybsv_analysis cusparseDhybsv_analysis +#define cusparseTcsr2hyb cusparseDcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis d_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseDcsrsv2_solve + +#endif + +#include "fcusparse_fct.h" + +#endif diff --git a/gpu/diagdev.c b/gpu/diagdev.c new file mode 100644 index 00000000..64879455 --- /dev/null +++ b/gpu/diagdev.c @@ -0,0 +1,291 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#include "diagdev.h" +#include +#include +#include +#include +#if defined(HAVE_SPGPU) +//new +DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns, unsigned int diags, unsigned int elementType) +{ + DiagDeviceParams params; + + params.elementType = elementType; + //numero di elementi di val + params.rows = rows; + params.columns = columns; + params.diags = diags; + + return params; + +} +//new +int allocDiagDevice(void ** remoteMatrix, DiagDeviceParams* params) +{ + struct DiagDevice *tmp = (struct DiagDevice *)malloc(sizeof(struct DiagDevice)); + int ret=SPGPU_SUCCESS; + *remoteMatrix = (void *)tmp; + + tmp->rows = params->rows; + + tmp->cols = params->columns; + + tmp->diags = params->diags; + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->off), tmp->diags*sizeof(int)); + + /* tmp->baseIndex = params->firstIndex; */ + + if (params->elementType == SPGPU_TYPE_INT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(double)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return ret; +} + +void freeDiagDevice(void* remoteMatrix) +{ + struct DiagDevice *devMat = (struct DiagDevice *) remoteMatrix; + //fprintf(stderr,"freeHllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->off); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int columns,unsigned int diags,unsigned int elementType) +{ int i; +#ifdef HAVE_SPGPU + DiagDeviceParams p; + + p = getDiagDeviceParams(rows, columns, diags,elementType); + i = allocDiagDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + } + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n) +{ int i,fo,fa; + char buf_a[255], buf_o[255],tmp[255]; +#ifdef HAVE_SPGPU + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + // Ex updateFromHost function + /* memset(buf_a,'\0',255); */ + /* memset(buf_o,'\0',255); */ + /* memset(tmp,'\0',255); */ + + /* strcat(buf_a,"mat_"); */ + /* strcat(buf_o,"off_"); */ + /* sprintf(tmp,"%d_%d.dat",devMat->rows,devMat->cols); */ + /* strcat(buf_a,tmp); */ + /* memset(tmp,'\0',255); */ + /* sprintf(tmp,"%d.dat",devMat->cols); */ + /* strcat(buf_o,tmp); */ + + /* fa = open(buf_a, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + /* fo = open(buf_o, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + + /* i = write(fa, a, sizeof(double)*devMat->cols*devMat->rows); */ + /* i = write(fo, off, sizeof(int)*devMat->cols); */ + + /* close(fa); */ + /* close(fo); */ + + i = writeRemoteBuffer((void*) a, (void *)devMat->cM, devMat->rows*devMat->diags*sizeof(double)); + i = writeRemoteBuffer((void*) off, (void *)devMat->off, devMat->diags*sizeof(int)); + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readDiagDeviceDouble(void* deviceMat, double* a, int* off) +{ int i; +#ifdef HAVE_SPGPU + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double)); + i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +//new +int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY) +{ + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /* spgpuDdiagspmv(handle, (double *)y->v_, (double *)y->v_,alpha,(double *)devMat->cM,devMat->off,devMat->rows,devMat->cols,x->v_,beta,devMat->baseIndex); */ + + spgpuDdiaspmv(handle, (double *)y->v_, (double *)y->v_,alpha,(double *)devMat->cM,devMat->off,devMat->rows,devMat->rows,devMat->cols,devMat->diags,x->v_,beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n) +{ int i,fo,fa; + char buf_a[255], buf_o[255],tmp[255]; +#ifdef HAVE_SPGPU + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + // Ex updateFromHost function + /* memset(buf_a,'\0',255); */ + /* memset(buf_o,'\0',255); */ + /* memset(tmp,'\0',255); */ + + /* strcat(buf_a,"mat_"); */ + /* strcat(buf_o,"off_"); */ + /* sprintf(tmp,"%d_%d.dat",devMat->rows,devMat->cols); */ + /* strcat(buf_a,tmp); */ + /* memset(tmp,'\0',255); */ + /* sprintf(tmp,"%d.dat",devMat->cols); */ + /* strcat(buf_o,tmp); */ + + /* fa = open(buf_a, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + /* fo = open(buf_o, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + + /* i = write(fa, a, sizeof(float)*devMat->cols*devMat->rows); */ + /* i = write(fo, off, sizeof(int)*devMat->cols); */ + + /* close(fa); */ + /* close(fo); */ + + i = writeRemoteBuffer((void*) a, (void *)devMat->cM, devMat->rows*devMat->diags*sizeof(float)); + i = writeRemoteBuffer((void*) off, (void *)devMat->off, devMat->diags*sizeof(int)); + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readDiagDeviceFloat(void* deviceMat, float* a, int* off) +{ int i; +#ifdef HAVE_SPGPU + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float)); + i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +//new +int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY) +{ + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /* spgpuDdiagspmv(handle, (float *)y->v_, (float *)y->v_,alpha,(float *)devMat->cM,devMat->off,devMat->rows,devMat->cols,x->v_,beta,devMat->baseIndex); */ + + spgpuSdiaspmv(handle, (float *)y->v_, (float *)y->v_,alpha,(float *)devMat->cM,devMat->off,devMat->rows,devMat->rows,devMat->cols,devMat->diags,x->v_,beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +#endif diff --git a/gpu/diagdev.h b/gpu/diagdev.h new file mode 100644 index 00000000..83f38289 --- /dev/null +++ b/gpu/diagdev.h @@ -0,0 +1,95 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#ifndef _DIAGDEV_H_ +#define _DIAGDEV_H_ + +#ifdef HAVE_SPGPU +#include "cintrf.h" +#include "dia.h" + +struct DiagDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // offset (same size of cM) + int *off; + + int rows; + + int cols; + + int diags; + +}; + +typedef struct DiagDeviceParams +{ + + unsigned int elementType; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + //unsigned int hackOffsLength; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + unsigned int diags; + +} DiagDeviceParams; +DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns, + unsigned int elementType, unsigned int firstIndex); +int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, + unsigned int elementType, unsigned int firstIndex); +int allocDiagDevice(void ** remoteMatrix, DiagDeviceParams* params); +void freeDiagDevice(void* remoteMatrix); + +int readDiagDeviceDouble(void* deviceMat, double* a, int* off); +int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n); +int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY); + +int readDiagDeviceFloat(void* deviceMat, float* a, int* off); +int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n); +int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY); + + + +#else +#define CINTRF_UNSUPPORTED -1 +#endif + +#endif diff --git a/gpu/diagdev_mod.F90 b/gpu/diagdev_mod.F90 new file mode 100644 index 00000000..cbcc029e --- /dev/null +++ b/gpu/diagdev_mod.F90 @@ -0,0 +1,231 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module diagdev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: diagdev_parms + integer(c_int) :: element_type + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: firstIndex + end type diagdev_parms + +#ifdef HAVE_SPGPU + + interface + function FgetDiagDeviceParams(rows, columns, elementType, firstIndex) & + & result(res) bind(c,name='getDiagDeviceParams') + use iso_c_binding + import :: diagdev_parms + type(diagdev_parms) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + end function FgetDiagDeviceParams + end interface + + + interface + function FallocDiagDevice(deviceMat,rows,columns,& + & elementType,firstIndex) & + & result(res) bind(c,name='FallocDiagDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocDiagDevice + end interface + + + interface writeDiagDevice + + function writeDiagDeviceFloat(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + real(c_float) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function writeDiagDeviceFloat + + function writeDiagDeviceDouble(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int),value :: n + real(c_double) :: a(n,*) + integer(c_int) :: off(*) + end function writeDiagDeviceDouble + + function writeDiagDeviceFloatComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_float_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function writeDiagDeviceFloatComplex + + function writeDiagDeviceDoubleComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_double_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function writeDiagDeviceDoubleComplex + + end interface + + interface readDiagDevice + + function readDiagDeviceFloat(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function readDiagDeviceFloat + + function readDiagDeviceDouble(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int),value :: n + real(c_double) :: a(n,*) + integer(c_int) :: off(*) + end function readDiagDeviceDouble + + function readDiagDeviceFloatComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_float_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function readDiagDeviceFloatComplex + + function readDiagDeviceDoubleComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_double_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function readDiagDeviceDoubleComplex + + end interface + + interface + subroutine freeDiagDevice(deviceMat) & + & bind(c,name='freeDiagDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeDiagDevice + end interface + + interface + subroutine resetDiagTimer() bind(c,name='resetDiagTimer') + use iso_c_binding + end subroutine resetDiagTimer + end interface + interface + function getDiagTimer() & + & bind(c,name='getDiagTimer') result(res) + use iso_c_binding + real(c_double) :: res + end function getDiagTimer + end interface + + + interface + function getDiagDevicePitch(deviceMat) & + & bind(c,name='getDiagDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getDiagDevicePitch + end interface + + interface + function getDiagDeviceMaxRowSize(deviceMat) & + & bind(c,name='getDiagDeviceMaxRowSize') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getDiagDeviceMaxRowSize + end interface + + + interface spmvDiagDevice + function spmvDiagDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvDiagDeviceFloat + function spmvDiagDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvDiagDeviceDouble + function spmvDiagDeviceFloatComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex),value :: alpha, beta + end function spmvDiagDeviceFloatComplex + function spmvDiagDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex),value :: alpha, beta + end function spmvDiagDeviceDoubleComplex + end interface spmvDiagDevice + +#endif + + +end module diagdev_mod diff --git a/gpu/dnsdev.c b/gpu/dnsdev.c new file mode 100644 index 00000000..fb4d339c --- /dev/null +++ b/gpu/dnsdev.c @@ -0,0 +1,383 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#include +#include "dnsdev.h" + +#if defined(HAVE_SPGPU) + +#define PASS_RS 0 + +#define IMIN(a,b) ((a)<(b) ? (a) : (b)) + +DnsDeviceParams getDnsDeviceParams(unsigned int rows, unsigned int columns, + unsigned int elementType, unsigned int firstIndex) +{ + DnsDeviceParams params; + + if (elementType == SPGPU_TYPE_DOUBLE) + { + params.pitch = ((rows + ELL_PITCH_ALIGN_D - 1)/ELL_PITCH_ALIGN_D)*ELL_PITCH_ALIGN_D; + } + else + { + params.pitch = ((rows + ELL_PITCH_ALIGN_S - 1)/ELL_PITCH_ALIGN_S)*ELL_PITCH_ALIGN_S; + } + //For complex? + params.elementType = elementType; + params.rows = rows; + params.columns = columns; + params.firstIndex = firstIndex; + + return params; + +} +//new +int allocDnsDevice(void ** remoteMatrix, DnsDeviceParams* params) +{ + struct DnsDevice *tmp = (struct DnsDevice *)malloc(sizeof(struct DnsDevice)); + *remoteMatrix = (void *)tmp; + tmp->rows = params->rows; + tmp->columns = params->columns; + tmp->cMPitch = params->pitch; + tmp->pitch= tmp->cMPitch; + tmp->allocsize = (int)tmp->columns * tmp->pitch; + tmp->baseIndex = params->firstIndex; + //fprintf(stderr,"allocDnsDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize); + if (params->elementType == SPGPU_TYPE_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float)); + else if (params->elementType == SPGPU_TYPE_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex)); + else + return SPGPU_UNSUPPORTED; // Unsupported params + //fprintf(stderr,"From allocDnsDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, + // tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM); + + return SPGPU_SUCCESS; +} + +void freeDnsDevice(void* remoteMatrix) +{ + struct DnsDevice *devMat = (struct DnsDevice *) remoteMatrix; + //fprintf(stderr,"freeDnsDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocDnsDevice(void** deviceMat, unsigned int rows, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex) +{ int i; +#ifdef HAVE_SPGPU + DnsDeviceParams p; + + p = getDnsDeviceParams(rows, columns, elementType, firstIndex); + i = allocDnsDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocDnsDevice",i); + } + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha, + void *deviceMat, void* deviceX, float *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; +#ifdef HAVE_SPGPU + + cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasSgemv(handle, trans, m,k, + alpha, devMat->cM,devMat->pitch, x->v_,1, + beta, y->v_,1); + } else { + status = cublasSgemm(handle, trans, CUBLAS_OP_N, m,n,k, + alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha, + void *deviceMat, void* deviceX, double *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; +#ifdef HAVE_SPGPU + + cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasDgemv(handle, trans, m,k, + alpha, devMat->cM,devMat->pitch, x->v_,1, + beta, y->v_,1); + } else { + status = cublasDgemm(handle, trans, CUBLAS_OP_N, m,n,k, + alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *alpha, + void *deviceMat, void* deviceX, float complex *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; +#ifdef HAVE_SPGPU + + cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasCgemv(handle, trans, m,k, + alpha, devMat->cM,devMat->pitch, x->v_,1, + beta, y->v_,1); + } else { + status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k, + alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex *alpha, + void *deviceMat, void* deviceX, double complex *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; +#ifdef HAVE_SPGPU + + cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasZgemv(handle, trans, m,k, + alpha, devMat->cM,devMat->pitch, x->v_,1, + beta, y->v_,1); + } else { + status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k, + alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloat",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDouble",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloat",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDouble",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int getDnsDevicePitch(void* deviceMat) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; +#ifdef HAVE_SPGPU + i = devMat->pitch; + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + + + +#endif + diff --git a/gpu/dnsdev.h b/gpu/dnsdev.h new file mode 100644 index 00000000..7c8b06c9 --- /dev/null +++ b/gpu/dnsdev.h @@ -0,0 +1,122 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _DNSDEV_H_ +#define _DNSDEV_H_ + +#if defined(HAVE_SPGPU) +#include "cintrf.h" +#include "cuComplex.h" +#include "cublas_v2.h" + + +struct DnsDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + + //matrix size (uncompressed) + int rows; + int columns; + + int pitch; //old + + int cMPitch; + + //allocation size (in elements) + int allocsize; + + /*(i.e. 0 for C, 1 for Fortran)*/ + int baseIndex; +}; + +typedef struct DnsDeviceParams +{ + // The resulting allocation for cM and rP will be pitch*maxRowSize*(size of the elementType) + unsigned int elementType; + + // Pitch (in number of elements) + unsigned int pitch; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + // First index (e.g 0 or 1) + unsigned int firstIndex; +} DnsDeviceParams; + +int FallocDnsDevice(void** deviceMat, unsigned int rows, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex); +int allocDnsDevice(void ** remoteMatrix, DnsDeviceParams* params); +void freeDnsDevice(void* remoteMatrix); + +int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc); +int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc); +int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc); +int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc); + +int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc); +int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc); +int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc); +int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc); + +int spmvDnsDeviceFloat(char transa, int m, int n, int k, + float *alpha, void *deviceMat, void* deviceX, + float *beta, void* deviceY); +int spmvDnsDeviceDouble(char transa, int m, int n, int k, + double *alpha, void *deviceMat, void* deviceX, + double *beta, void* deviceY); +int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, + float complex *alpha, void *deviceMat, void* deviceX, + float complex *beta, void* deviceY); +int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, + double complex *alpha, void *deviceMat, void* deviceX, + double complex *beta, void* deviceY); + +int getDnsDevicePitch(void* deviceMat); + +// sparse Dns matrix-vector product +//int spmvDnsDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY); +//int spmvDnsDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY); + +#else +#define CINTRF_UNSUPPORTED -1 +#endif + +#endif diff --git a/gpu/dnsdev_mod.F90 b/gpu/dnsdev_mod.F90 new file mode 100644 index 00000000..8b96b918 --- /dev/null +++ b/gpu/dnsdev_mod.F90 @@ -0,0 +1,275 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module dnsdev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: dnsdev_parms + integer(c_int) :: element_type + integer(c_int) :: pitch + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: maxRowSize + integer(c_int) :: avgRowSize + integer(c_int) :: firstIndex + end type dnsdev_parms + +#ifdef HAVE_SPGPU + + interface + function FgetDnsDeviceParams(rows, columns, elementType, firstIndex) & + & result(res) bind(c,name='getDnsDeviceParams') + use iso_c_binding + import :: dnsdev_parms + type(dnsdev_parms) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + end function FgetDnsDeviceParams + end interface + + + interface + function FallocDnsDevice(deviceMat,rows,columns,& + & elementType,firstIndex) & + & result(res) bind(c,name='FallocDnsDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocDnsDevice + end interface + + + interface writeDnsDevice + + function writeDnsDeviceFloat(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_float) :: val(lda,*) + end function writeDnsDeviceFloat + + + function writeDnsDeviceDouble(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_double) :: val(lda,*) + end function writeDnsDeviceDouble + + + function writeDnsDeviceFloatComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceFloatComple') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_float_complex) :: val(lda,*) + end function writeDnsDeviceFloatComplex + + + function writeDnsDeviceDoubleComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_double_complex) :: val(lda,*) + end function writeDnsDeviceDoubleComplex + + end interface + + interface readDnsDevice + + function readDnsDeviceFloat(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_float) :: val(lda,*) + end function readDnsDeviceFloat + + + function readDnsDeviceDouble(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_double) :: val(lda,*) + end function readDnsDeviceDouble + + + function readDnsDeviceFloatComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceFloatComple') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_float_complex) :: val(lda,*) + end function readDnsDeviceFloatComplex + + + function readDnsDeviceDoubleComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_double_complex) :: val(lda,*) + end function readDnsDeviceDoubleComplex + + end interface + + interface + subroutine freeDnsDevice(deviceMat) & + & bind(c,name='freeDnsDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeDnsDevice + end interface + + interface + subroutine resetDnsTimer() bind(c,name='resetDnsTimer') + use iso_c_binding + end subroutine resetDnsTimer + end interface + interface + function getDnsTimer() & + & bind(c,name='getDnsTimer') result(res) + use iso_c_binding + real(c_double) :: res + end function getDnsTimer + end interface + + + interface + function getDnsDevicePitch(deviceMat) & + & bind(c,name='getDnsDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getDnsDevicePitch + end interface + +!!$ interface csputDnsDeviceFloat +!!$ function dev_csputDnsDeviceFloat(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceFloat') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceFloat +!!$ end interface +!!$ +!!$ interface csputDnsDeviceDouble +!!$ function dev_csputDnsDeviceDouble(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceDouble') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceDouble +!!$ end interface +!!$ +!!$ interface csputDnsDeviceFloatComplex +!!$ function dev_csputDnsDeviceFloatComplex(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceFloatComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceFloatComplex +!!$ end interface +!!$ +!!$ interface csputDnsDeviceDoubleComplex +!!$ function dev_csputDnsDeviceDoubleComplex(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceDoubleComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceDoubleComplex +!!$ end interface + + interface spmvDnsDevice + function spmvDnsDeviceFloat(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceFloat') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float) :: alpha, beta + end function spmvDnsDeviceFloat + + function spmvDnsDeviceDouble(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceDouble') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double) :: alpha, beta + end function spmvDnsDeviceDouble + + function spmvDnsDeviceFloatComplex(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceFloatComplex') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex) :: alpha, beta + end function spmvDnsDeviceFloatComplex + + function spmvDnsDeviceDoubleComplex(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceDoubleComplex') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex) :: alpha, beta + end function spmvDnsDeviceDoubleComplex + + end interface + +#endif + + +end module dnsdev_mod diff --git a/gpu/dvectordev.c b/gpu/dvectordev.c new file mode 100644 index 00000000..8b020c16 --- /dev/null +++ b/gpu/dvectordev.c @@ -0,0 +1,305 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +#if defined(HAVE_SPGPU) +//#include "utils.h" +//#include "common.h" +#include "dvectordev.h" + + +int registerMappedDouble(void *buff, void **d_p, int n, double dummy) +{ + return registerMappedMemory(buff,d_p,n*sizeof(double)); +} + +int writeMultiVecDeviceDouble(void* deviceVec, double* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, devVec->pitch_*devVec->count_*sizeof(double)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceDouble(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceDoubleR2",i); + } + return(i); +} + +int readMultiVecDeviceDouble(void* deviceVec, double* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(double)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDouble",i); + } + return(i); +} + +int readMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld) +{ int i; + i = readMultiVecDeviceDouble(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleR2",i); + } + return(i); +} + +int setscalMultiVecDeviceDouble(double val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuDsetscal(handle, first, last, indexBase, val, (double *) devVecX->v_); + + return(i); +} + + +int geinsMultiVecDeviceDouble(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + double beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = 0.0; + else if (dupl == INS_ADD) + beta = 1.0; + else + beta = 0.0; + + spgpuDscat(handle, (double *) devVecX->v_, n, (double*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceDouble(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + double *hv = (double *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuDgath(handle,hv, n, idx,indexBase, (double *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, int first, void *deviceIdx, + int hfirst, void* host_values, int indexBase, double beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceDouble(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, double beta) +{ int i=0; + double *hv = (double *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuDscat(handle, (double *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + +int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuDscal(handle, (double *)devVecA->v_, devVecA->pitch_, + alpha, (double *)devVecA->v_); + return(i); +} + +int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuDmnrm2(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuDmamax(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuDmasum(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_); + return(i); +} + +int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX, + double beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuDaxpby(handle,(double*)devVecY->v_+pitch*j, n, beta, + (double*)devVecY->v_+pitch*j, alpha,(double*) devVecX->v_+pitch*j); + return(i); +} + +int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuDmaxy(handle, (double*)devVecB->v_, n, alpha, (double*)devVecA->v_, + (double*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, + void *deviceVecB, double beta, void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuDmaxypbz(handle, (double*)devVecZ->v_, n, beta, (double*)devVecZ->v_, + alpha, (double*) devVecA->v_, (double*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + +int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuDabs(handle, (double*)devVecB->v_, n, alpha, (double*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_gpuGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuDabs(handle, (double*)devVecA->v_, n, alpha, (double*)devVecA->v_); + + return(i); +} + + +#endif + diff --git a/gpu/dvectordev.h b/gpu/dvectordev.h new file mode 100644 index 00000000..960958c5 --- /dev/null +++ b/gpu/dvectordev.h @@ -0,0 +1,78 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +#if defined(HAVE_SPGPU) +//#include "utils.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +int registerMappedDouble(void *, void **, int, double); +int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec); +int writeMultiVecDeviceDoubleR2(void* deviceMultiVec, double* hostMultiVec, int ld); +int readMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec); +int readMultiVecDeviceDoubleR2(void* deviceMultiVec, double* hostMultiVec, int ld); + +int setscalMultiVecDeviceDouble(double val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceDouble(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, double beta); +int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, double beta); + +int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA); +int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devVecA); +int amaxMultiVecDeviceDouble(double* y_res, int n, void* devVecA); +int asumMultiVecDeviceDouble(double* y_res, int n, void* devVecA); +int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY); +int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, + void *deviceVecB, double beta, void *deviceVecZ); +int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA); +int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, void *deviceVecB); + + +#endif diff --git a/gpu/elldev.c b/gpu/elldev.c new file mode 100644 index 00000000..8fd7aeb5 --- /dev/null +++ b/gpu/elldev.c @@ -0,0 +1,773 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#include +#include "elldev.h" + +#if defined(HAVE_SPGPU) + +#define PASS_RS 0 + +EllDeviceParams getEllDeviceParams(unsigned int rows, unsigned int maxRowSize, + unsigned int nnzeros, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex) +{ + EllDeviceParams params; + + if (elementType == SPGPU_TYPE_DOUBLE) + { + params.pitch = ((rows + ELL_PITCH_ALIGN_D - 1)/ELL_PITCH_ALIGN_D)*ELL_PITCH_ALIGN_D; + } + else + { + params.pitch = ((rows + ELL_PITCH_ALIGN_S - 1)/ELL_PITCH_ALIGN_S)*ELL_PITCH_ALIGN_S; + } + //For complex? + params.elementType = elementType; + + params.rows = rows; + params.maxRowSize = maxRowSize; + params.avgRowSize = (nnzeros+rows-1)/rows; + params.columns = columns; + params.firstIndex = firstIndex; + + //params.pitch = computeEllAllocPitch(rows); + + return params; + +} +//new +int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params) +{ + struct EllDevice *tmp = (struct EllDevice *)malloc(sizeof(struct EllDevice)); + *remoteMatrix = (void *)tmp; + tmp->rows = params->rows; + tmp->cMPitch = computeEllAllocPitch(tmp->rows); + tmp->rPPitch = tmp->cMPitch; + tmp->pitch= tmp->cMPitch; + tmp->maxRowSize = params->maxRowSize; + tmp->avgRowSize = params->avgRowSize; + tmp->allocsize = (int)tmp->maxRowSize * tmp->pitch; + //tmp->allocsize = (int)params->maxRowSize * tmp->cMPitch; + allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int)); + allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int)); + allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int)); + tmp->columns = params->columns; + tmp->baseIndex = params->firstIndex; + tmp->dataType = params->elementType; + //fprintf(stderr,"allocEllDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize); + if (params->elementType == SPGPU_TYPE_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float)); + else if (params->elementType == SPGPU_TYPE_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex)); + else + return SPGPU_UNSUPPORTED; // Unsupported params + //fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, + // tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM); + + return SPGPU_SUCCESS; +} + +//new +void zeroEllDevice(void *remoteMatrix) +{ + struct EllDevice *tmp = (struct EllDevice *) remoteMatrix; + + if (tmp->dataType == SPGPU_TYPE_FLOAT) + cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(float)); + else if (tmp->dataType == SPGPU_TYPE_DOUBLE) + cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(double)); + else if (tmp->dataType == SPGPU_TYPE_COMPLEX_FLOAT) + cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuFloatComplex)); + else if (tmp->dataType == SPGPU_TYPE_COMPLEX_DOUBLE) + cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuDoubleComplex)); + else + return SPGPU_UNSUPPORTED; // Unsupported params + //fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, + // tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM); + + return; +} + + +void freeEllDevice(void* remoteMatrix) +{ + struct EllDevice *devMat = (struct EllDevice *) remoteMatrix; + //fprintf(stderr,"freeEllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->rS); + freeRemoteBuffer(devMat->rP); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocEllDevice(void** deviceMat,unsigned int rows, unsigned int maxRowSize, + unsigned int nnzeros, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex) +{ int i; +#ifdef HAVE_SPGPU + EllDeviceParams p; + + p = getEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex); + i = allocEllDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + } + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + +void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, int* rP, int* rS, + int avgRowSize, int maxRowSize, int rows, int pitch, float *x, float beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + + for (i=0; icount_ == x->count_, "ERROR: x and y don't share the same number of vectors"); + __assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)"); + __assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)"); +#endif + /*spgpuSellspmv (handle, (float*) y->v_, (float*)y->v_, alpha, + (float*) devMat->cM, devMat->rP, devMat->cMPitch, + devMat->rPPitch, devMat->rS, devMat->rows, + (float*)x->v_, beta, devMat->baseIndex);*/ + sspmdmm_gpu ( (float *)y->v_,y->count_, y->pitch_, (float *)y->v_, alpha, (float *)devMat->cM, devMat->rP, devMat->rS, + devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch, + (float *)x->v_, beta, devMat->baseIndex); + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +void +dspmdmm_gpu (double *z,int s, int vPitch, double *y, double alpha, double* cM, int* rP, + int* rS, int avgRowSize, int maxRowSize, int rows, int pitch, + double *x, double beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + for (i=0; iv_, (double*)y->v_, alpha, (double*) devMat->cM, devMat->rP, devMat->cMPitch, devMat->rPPitch, devMat->rS, devMat->rows, (double*)x->v_, beta, devMat->baseIndex);*/ + /* fprintf(stderr,"From spmvEllDouble: mat %d %d %d %d y %d %d \n", */ + /* devMat->avgRowSize, devMat->maxRowSize, devMat->rows, */ + /* devMat->pitch, y->count_, y->pitch_); */ + dspmdmm_gpu ((double *)y->v_, y->count_, y->pitch_, (double *)y->v_, + alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->avgRowSize, + devMat->maxRowSize, devMat->rows, devMat->pitch, + (double *)x->v_, beta, devMat->baseIndex); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +void +cspmdmm_gpu (cuFloatComplex *z, int s, int vPitch, cuFloatComplex *y, + cuFloatComplex alpha, cuFloatComplex* cM, + int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch, + cuFloatComplex *x, cuFloatComplex beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + for (i=0; iv_, y->count_, y->pitch_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM, + devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch, + (cuFloatComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +void +zspmdmm_gpu (cuDoubleComplex *z, int s, int vPitch, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* cM, + int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch, + cuDoubleComplex *x, cuDoubleComplex beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + for (i=0; iv_, y->count_, y->pitch_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM, + devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows, + devMat->pitch, (cuDoubleComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + //i = writeEllDevice(deviceMat, (void *) val, ja, irn); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + /*i = readEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int getEllDevicePitch(void* deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; +#ifdef HAVE_SPGPU + i = devMat->pitch; //old + //i = getPitchEllDevice(deviceMat); + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int getEllDeviceMaxRowSize(void* deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; +#ifdef HAVE_SPGPU + i = devMat->maxRowSize; + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + + + + +// New copying interface + +int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float *val, void *deviceMat) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + float *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_gpuGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float)); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_s_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + + +int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double *val, void *deviceMat) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + double *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_gpuGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double)); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_d_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float complex *val, void *deviceMat) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + float complex *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_gpuGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex)); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_c_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float complex)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + + +int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double complex *val, void *deviceMat) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + double complex *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_gpuGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex)); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_z_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double complex)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int dev_csputEllDeviceFloat(void* deviceMat, int nnz, void *ia, void *ja, void *val) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + float alpha=1.0; + spgpuHandle_t handle=psb_gpuGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuSellcsput(handle,alpha,(float *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (float *) devVal->v_, 1); + +#endif + return SPGPU_SUCCESS; +} + +int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void *val) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + double alpha=1.0; + spgpuHandle_t handle=psb_gpuGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuDellcsput(handle,alpha,(double *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (double *) devVal->v_, 1); + +#endif + return SPGPU_SUCCESS; +} + + +int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + cuFloatComplex alpha = make_cuFloatComplex(1.0, 0.0); + spgpuHandle_t handle=psb_gpuGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuCellcsput(handle,alpha,(cuFloatComplex *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (cuFloatComplex *) devVal->v_, 1); + +#endif + return SPGPU_SUCCESS; +} + +int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0); + spgpuHandle_t handle=psb_gpuGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuZellcsput(handle,alpha,(cuDoubleComplex *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (cuDoubleComplex *) devVal->v_, 1); + +#endif + return SPGPU_SUCCESS; +} + +#endif + diff --git a/gpu/elldev.h b/gpu/elldev.h new file mode 100644 index 00000000..6a0814e2 --- /dev/null +++ b/gpu/elldev.h @@ -0,0 +1,183 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _ELLDEV_H_ +#define _ELLDEV_H_ + +#if defined(HAVE_SPGPU) +#include "cintrf.h" +#include "cuComplex.h" +#include "ell.h" + + +struct EllDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // row pointers (same size of cM) + int *rP; + int *diag; + // row size + int *rS; + + //matrix size (uncompressed) + int rows; + int columns; + + int pitch; //old + + int cMPitch; + + int rPPitch; + + int maxRowSize; + int avgRowSize; + + //allocation size (in elements) + int allocsize; + + /*(i.e. 0 for C, 1 for Fortran)*/ + int baseIndex; + /* real/complex, single/double */ + int dataType; + +}; + +typedef struct EllDeviceParams +{ + // The resulting allocation for cM and rP will be pitch*maxRowSize*(size of the elementType) + unsigned int elementType; + + // Pitch (in number of elements) + unsigned int pitch; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + // Largest row size + unsigned int maxRowSize; + unsigned int avgRowSize; + + // First index (e.g 0 or 1) + unsigned int firstIndex; +} EllDeviceParams; + +int FallocEllDevice(void** deviceMat, unsigned int rows, unsigned int maxRowSize, + unsigned int nnzeros, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex); +int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params); +void freeEllDevice(void* remoteMatrix); + +int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag); +int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag); +int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag); +int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag); + +int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag); +int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag); +int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag); +int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag); + +int spmvEllDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY); +int spmvEllDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY); +int spmvEllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX, + float complex beta, void* deviceY); +int spmvEllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX, + double complex beta, void* deviceY); + + + +int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float *val, void *deviceMat); + +int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double *val, void *deviceMat); + +int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float complex *val, void *deviceMat); + +int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double complex *val, void *deviceMat); + + +void psi_cuda_s_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, float *devVal, + int *idiag, int *rP, float *cM); + +void psi_cuda_d_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, double *devVal, + int *idiag, int *rP, double *cM); + +void psi_cuda_c_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, float complex *devVal, + int *idiag, int *rP, float complex *cM); + +void psi_cuda_z_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, double complex *devVal, + int *idiag, int *rP, double complex *cM); + + +int dev_csputEllDeviceFloat(void* deviceMat, int nnz, + void *ia, void *ja, void *val); +int dev_csputEllDeviceDouble(void* deviceMat, int nnz, + void *ia, void *ja, void *val); +int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val); +int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val); + +void zeroEllDevice(void* deviceMat); + +int getEllDevicePitch(void* deviceMat); + +// sparse Ell matrix-vector product +//int spmvEllDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY); +//int spmvEllDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY); + +#else +#define CINTRF_UNSUPPORTED -1 +#endif + +#endif diff --git a/gpu/elldev_mod.F90 b/gpu/elldev_mod.F90 new file mode 100644 index 00000000..49656d19 --- /dev/null +++ b/gpu/elldev_mod.F90 @@ -0,0 +1,326 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module elldev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: elldev_parms + integer(c_int) :: element_type + integer(c_int) :: pitch + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: maxRowSize + integer(c_int) :: avgRowSize + integer(c_int) :: firstIndex + end type elldev_parms + +#ifdef HAVE_SPGPU + + interface + function FgetEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex) & + & result(res) bind(c,name='getEllDeviceParams') + use iso_c_binding + import :: elldev_parms + type(elldev_parms) :: res + integer(c_int), value :: rows,maxRowSize,nnzeros,columns,elementType,firstIndex + end function FgetEllDeviceParams + end interface + + + interface + function FallocEllDevice(deviceMat,rows,maxRowSize,nnzeros,columns,& + & elementType,firstIndex) & + & result(res) bind(c,name='FallocEllDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,maxRowSize,nnzeros,columns,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocEllDevice + end interface + + + interface writeEllDevice + + function writeEllDeviceFloat(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_float) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceFloat + + function writeEllDeviceDouble(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_double) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceDouble + + function writeEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_float_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceFloatComplex + + function writeEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_double_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceDoubleComplex + + end interface + + interface readEllDevice + + function readEllDeviceFloat(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_float) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceFloat + + function readEllDeviceDouble(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_double) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceDouble + + function readEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_float_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceFloatComplex + + function readEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_double_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceDoubleComplex + + end interface + + interface + subroutine freeEllDevice(deviceMat) & + & bind(c,name='freeEllDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeEllDevice + end interface + + interface + subroutine zeroEllDevice(deviceMat) & + & bind(c,name='zeroEllDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine zeroEllDevice + end interface + + interface + subroutine resetEllTimer() bind(c,name='resetEllTimer') + use iso_c_binding + end subroutine resetEllTimer + end interface + interface + function getEllTimer() & + & bind(c,name='getEllTimer') result(res) + use iso_c_binding + real(c_double) :: res + end function getEllTimer + end interface + + + interface + function getEllDevicePitch(deviceMat) & + & bind(c,name='getEllDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getEllDevicePitch + end interface + + interface + function getEllDeviceMaxRowSize(deviceMat) & + & bind(c,name='getEllDeviceMaxRowSize') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getEllDeviceMaxRowSize + end interface + + + interface psi_CopyCooToElg + function psiCopyCooToElgFloat(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgFloat + function psiCopyCooToElgDouble(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgDouble + function psiCopyCooToElgFloatComplex(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgFloatComplex + function psiCopyCooToElgDoubleComplex(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgDoubleComplex + end interface + + interface csputEllDeviceFloat + function dev_csputEllDeviceFloat(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceFloat + end interface + + interface csputEllDeviceDouble + function dev_csputEllDeviceDouble(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceDouble + end interface + + interface csputEllDeviceFloatComplex + function dev_csputEllDeviceFloatComplex(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceFloatComplex + end interface + + interface csputEllDeviceDoubleComplex + function dev_csputEllDeviceDoubleComplex(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceDoubleComplex + end interface + + interface spmvEllDevice + function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvEllDeviceFloat + function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvEllDeviceDouble + function spmvEllDeviceFloatComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex),value :: alpha, beta + end function spmvEllDeviceFloatComplex + function spmvEllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex),value :: alpha, beta + end function spmvEllDeviceDoubleComplex + end interface + +#endif + + +end module elldev_mod diff --git a/gpu/fcusparse.c b/gpu/fcusparse.c new file mode 100644 index 00000000..5f0c12d9 --- /dev/null +++ b/gpu/fcusparse.c @@ -0,0 +1,77 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#ifdef HAVE_SPGPU +#include +#include "cintrf.h" +#include "fcusparse.h" + +static cusparseHandle_t *cusparse_handle=NULL; + + +void setHandle(cusparseHandle_t); + +int FcusparseCreate() +{ + int ret=CUSPARSE_STATUS_SUCCESS; + cusparseHandle_t *handle; + if (cusparse_handle == NULL) { + if ((handle = (cusparseHandle_t *)malloc(sizeof(cusparseHandle_t)))==NULL) + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + ret = (int)cusparseCreate(handle); + if (ret == CUSPARSE_STATUS_SUCCESS) + cusparse_handle = handle; + } + return (ret); +} + +int FcusparseDestroy() +{ + int val; + val = (int) cusparseDestroy(*cusparse_handle); + free(cusparse_handle); + cusparse_handle=NULL; + return(val); +} +cusparseHandle_t *getHandle() +{ + if (cusparse_handle == NULL) + FcusparseCreate(); + return(cusparse_handle); +} + + + +#endif diff --git a/gpu/fcusparse.h b/gpu/fcusparse.h new file mode 100644 index 00000000..2bab2aca --- /dev/null +++ b/gpu/fcusparse.h @@ -0,0 +1,70 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef FCUSPARSE_ +#define FCUSPARSE_ + +#ifdef HAVE_SPGPU +#include +#if CUDA_SHORT_VERSION <= 10 +#include +#else +#include +#endif +#include "cintrf.h" + +int FcusparseCreate(); +int FcusparseDestroy(); +cusparseHandle_t *getHandle(); + +#define CHECK_CUDA(func) \ +{ \ + cudaError_t status = (func); \ + if (status != cudaSuccess) { \ + printf("CUDA API failed at line %d with error: %s (%d)\n", \ + __LINE__, cudaGetErrorString(status), status); \ + return EXIT_FAILURE; \ + } \ +} + +#define CHECK_CUSPARSE(func) \ +{ \ + cusparseStatus_t status = (func); \ + if (status != CUSPARSE_STATUS_SUCCESS) { \ + printf("CUSPARSE API failed at line %d with error: %s (%d)\n", \ + __LINE__, cusparseGetErrorString(status), status); \ + return EXIT_FAILURE; \ + } \ +} + +#endif +#endif diff --git a/gpu/fcusparse_fct.h b/gpu/fcusparse_fct.h new file mode 100644 index 00000000..5a3b1ac6 --- /dev/null +++ b/gpu/fcusparse_fct.h @@ -0,0 +1,770 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +typedef struct T_CSRGDeviceMat +{ +#if CUDA_SHORT_VERSION <= 10 + cusparseMatDescr_t descr; + cusparseSolveAnalysisInfo_t triang; +#elif CUDA_VERSION < 11030 + cusparseMatDescr_t descr; + csrsv2Info_t triang; + size_t mvbsize, svbsize; + void *mvbuffer, *svbuffer; +#else + cusparseSpMatDescr_t descr; + cusparseSpSVDescr_t *spsvDescr; + size_t mvbsize, svbsize; + void *mvbuffer, *svbuffer; +#endif + int m, n, nz; + TYPE *val; + int *irp; + int *ja; +} T_CSRGDeviceMat; + +/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ +typedef struct T_Cmat +{ + T_CSRGDeviceMat *mat; +} T_Cmat; + +#if CUDA_SHORT_VERSION <= 10 +typedef struct T_HYBGDeviceMat +{ + cusparseMatDescr_t descr; + cusparseSolveAnalysisInfo_t triang; + cusparseHybMat_t hybA; + int m, n, nz; + TYPE *val; + int *irp; + int *ja; +} T_HYBGDeviceMat; + + +/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ +typedef struct T_Hmat +{ + T_HYBGDeviceMat *mat; +} T_Hmat; +#endif + +int T_spmvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_spsvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_CSRGDeviceAlloc(T_Cmat *Mat,int nr, int nc, int nz); +int T_CSRGDeviceFree(T_Cmat *Mat); + + +int T_CSRGHost2Device(T_Cmat *Mat, int m, int n, int nz, + int *irp, int *ja, TYPE *val); +int T_CSRGDevice2Host(T_Cmat *Mat, int m, int n, int nz, + int *irp, int *ja, TYPE *val); + +int T_CSRGDeviceGetParms(T_Cmat *Mat,int *nr, int *nc, int *nz); + +#if CUDA_SHORT_VERSION <= 10 +int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); +int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Mat); +#elif CUDA_VERSION < 11030 +int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); +#endif + + + +#if CUDA_SHORT_VERSION <= 10 + + +int T_HYBGDeviceFree(T_Hmat *Matrix); +int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz); +int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type); +int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix); +int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val); +#endif + +int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + T_CSRGDeviceMat *cMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n; + cusparseHandle_t *my_handle=getHandle(); + TYPE ealpha=alpha, ebeta=beta; +#if CUDA_SHORT_VERSION <= 10 + /*getAddrMultiVecDevice(deviceX, &vX); + getAddrMultiVecDevice(deviceY, &vY); */ + vX=x->v_; + vY=y->v_; + + return cusparseTcsrmv(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->n,cMat->nz,(const TYPE *) &alpha,cMat->descr, + cMat->val, cMat->irp, cMat->ja, + (const TYPE *) vX, (const TYPE *) &beta, (TYPE *) vY); +#elif CUDA_VERSION < 11030 + size_t bfsz; + vX=x->v_; + vY=y->v_; +#if 1 + CHECK_CUSPARSE(cusparseCsrmvEx_bufferSize(*my_handle,CUSPARSE_ALG_MERGE_PATH, + CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->n,cMat->nz, + (const void *) &ealpha,CUSPARSE_BASE_TYPE, + cMat->descr, + (const void *) cMat->val, + CUSPARSE_BASE_TYPE, + (const int *) cMat->irp, + (const int *) cMat->ja, + (const void *) vX, CUSPARSE_BASE_TYPE, + (const void *) &ebeta, CUSPARSE_BASE_TYPE, + (void *) vY, CUSPARSE_BASE_TYPE, + CUSPARSE_BASE_TYPE, &bfsz)); +#else + bfsz=cMat->nz; +#endif + + if (bfsz > cMat->mvbsize) { + if (cMat->mvbuffer != NULL) { + CHECK_CUDA(cudaFree(cMat->mvbuffer)); + cMat->mvbuffer = NULL; + } + CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + cMat->mvbsize = bfsz; + } + CHECK_CUSPARSE(cusparseCsrmvEx(*my_handle, + CUSPARSE_ALG_MERGE_PATH, + CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->n,cMat->nz, + (const void *) &ealpha,CUSPARSE_BASE_TYPE, + cMat->descr, + (const void *) cMat->val, CUSPARSE_BASE_TYPE, + (const int *) cMat->irp, (const int *) cMat->ja, + (const void *) vX, CUSPARSE_BASE_TYPE, + (const void *) &ebeta, CUSPARSE_BASE_TYPE, + (void *) vY, CUSPARSE_BASE_TYPE, + CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer)); + +#else + cusparseDnVecDescr_t vecX, vecY; + size_t bfsz; + vX=x->v_; + vY=y->v_; + CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); + CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); + CHECK_CUSPARSE(cusparseSpMV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,cMat->descr,vecX,&beta,vecY, + CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, + &bfsz)); + if (bfsz > cMat->mvbsize) { + if (cMat->mvbuffer != NULL) { + CHECK_CUDA(cudaFree(cMat->mvbuffer)); + cMat->mvbuffer = NULL; + } + CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + cMat->mvbsize = bfsz; + } + CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,cMat->descr,vecX,&beta,vecY, + CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, + cMat->mvbuffer)); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); +#endif +} + +int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + T_CSRGDeviceMat *cMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n; + cusparseHandle_t *my_handle=getHandle(); +#if CUDA_SHORT_VERSION <= 10 + vX=x->v_; + vY=y->v_; + + return cusparseTcsrsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,(const TYPE *) &alpha,cMat->descr, + cMat->val, cMat->irp, cMat->ja, cMat->triang, + (const TYPE *) vX, (TYPE *) vY); +#elif CUDA_VERSION < 11030 + vX=x->v_; + vY=y->v_; + CHECK_CUSPARSE(cusparseTcsrsv2_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz, + (const TYPE *) &alpha, + cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang, + (const TYPE *) vX, (TYPE *) vY, + CUSPARSE_SOLVE_POLICY_USE_LEVEL, + (void *) cMat->svbuffer)); +#else + cusparseDnVecDescr_t vecX, vecY; + size_t bfsz; + vX=x->v_; + vY=y->v_; + cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *)); + CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); + CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); + CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,cMat->descr,vecX,vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr), + &bfsz)); + if (bfsz > cMat->svbsize) { + if (cMat->svbuffer != NULL) { + CHECK_CUDA(cudaFree(cMat->svbuffer)); + cMat->svbuffer = NULL; + } + CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + cMat->svbsize=bfsz; + } + if (cMat->spsvDescr==NULL) { + CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + cMat->descr, + vecX, vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr), + cMat->svbuffer)); + } + + CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,cMat->descr,vecX,vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr))); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); +#endif +} + +int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) +{ + T_CSRGDeviceMat *cMat; + int nr1=nr, nz1=nz, rc; + cusparseHandle_t *my_handle=getHandle(); + int bfsz; + + if ((nr<0)||(nc<0)||(nz<0)) + return((int) CUSPARSE_STATUS_INVALID_VALUE); + if ((cMat=(T_CSRGDeviceMat *) malloc(sizeof(T_CSRGDeviceMat)))==NULL) + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + cMat->m = nr; + cMat->n = nc; + cMat->nz = nz; + if (nr1 == 0) nr1 = 1; + if (nz1 == 0) nz1 = 1; + if ((rc= allocRemoteBuffer(((void **) &(cMat->irp)), ((nr1+1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(cMat->ja)), ((nz1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(cMat->val)), ((nz1)*sizeof(TYPE)))) != 0) + return(rc); +#if CUDA_SHORT_VERSION <= 10 + if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0) + return(rc); + if ((rc= cusparseCreateSolveAnalysisInfo(&(cMat->triang))) !=0) + return(rc); +#elif CUDA_VERSION < 11030 + if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0) + return(rc); + CHECK_CUSPARSE(cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL)); + CHECK_CUSPARSE(cusparseSetMatDiagType(cMat->descr,CUSPARSE_DIAG_TYPE_NON_UNIT)); + CHECK_CUSPARSE(cusparseSetMatIndexBase(cMat->descr,CUSPARSE_INDEX_BASE_ONE)); + CHECK_CUSPARSE(cusparseCreateCsrsv2Info(&(cMat->triang))); + if (cMat->nz > 0) { + CHECK_CUSPARSE(cusparseTcsrsv2_bufferSize(*my_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz, cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang, &bfsz)); + } else { + bfsz = 0; + } + + /* if (cMat->svbuffer != NULL) { */ + /* fprintf(stderr,"Calling cudaFree\n"); */ + /* CHECK_CUDA(cudaFree(cMat->svbuffer)); */ + /* cMat->svbuffer = NULL; */ + /* } */ + if (bfsz > 0) { + CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + } else { + cMat->svbuffer=NULL; + } + cMat->svbsize=bfsz; + + cMat->mvbuffer=NULL; + cMat->mvbsize = 0; + + +#else + int64_t rows=nr, cols=nc, nnz=nz; + + CHECK_CUSPARSE(cusparseCreateCsr(&(cMat->descr), + rows, cols, nnz, + (void *) cMat->irp, + (void *) cMat->ja, + (void *) cMat->val, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ONE, + CUSPARSE_BASE_TYPE) ); + cMat->spsvDescr=NULL; + cMat->mvbuffer=NULL; + cMat->svbuffer=NULL; + cMat->mvbsize=0; + cMat->svbsize=0; +#endif + Matrix->mat = cMat; + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_CSRGDeviceFree(T_Cmat *Matrix) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + + if (cMat!=NULL) { + freeRemoteBuffer(cMat->irp); + freeRemoteBuffer(cMat->ja); + freeRemoteBuffer(cMat->val); +#if CUDA_SHORT_VERSION <= 10 + cusparseDestroyMatDescr(cMat->descr); + cusparseDestroySolveAnalysisInfo(cMat->triang); +#elif CUDA_VERSION < 11030 + cusparseDestroyMatDescr(cMat->descr); + cusparseDestroyCsrsv2Info(cMat->triang); +#else + cusparseDestroySpMat(cMat->descr); + if (cMat->spsvDescr!=NULL) { + CHECK_CUSPARSE( cusparseSpSV_destroyDescr(*(cMat->spsvDescr))); + free(cMat->spsvDescr); + cMat->spsvDescr=NULL; + } + if (cMat->mvbuffer!=NULL) + CHECK_CUDA( cudaFree(cMat->mvbuffer)); + if (cMat->svbuffer!=NULL) + CHECK_CUDA( cudaFree(cMat->svbuffer)); + cMat->spsvDescr=NULL; + cMat->mvbuffer=NULL; + cMat->svbuffer=NULL; + cMat->mvbsize=0; + cMat->svbsize=0; +#endif + free(cMat); + Matrix->mat = NULL; + } + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_CSRGDeviceGetParms(T_Cmat *Matrix,int *nr, int *nc, int *nz) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + + if (cMat!=NULL) { + *nr = cMat->m ; + *nc = cMat->n ; + *nz = cMat->nz ; + return(CUSPARSE_STATUS_SUCCESS); + } else { + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + } +} + +#if CUDA_SHORT_VERSION <= 10 + +int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatFillMode(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatDiagType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatIndexBase(cMat->descr,type)); +} + +int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Matrix) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + int rc, buffersize; + cusparseHandle_t *my_handle=getHandle(); + cusparseSolveAnalysisInfo_t info; + + rc= (int) cusparseTcsrsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz,cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang); + if (rc !=0) { + fprintf(stderr,"From csrsv_analysis: %d\n",rc); + } + return(rc); +} + +#elif CUDA_VERSION < 11030 +int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatFillMode(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatDiagType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatIndexBase(cMat->descr,type)); +} + +#else + +int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + cusparseFillMode_t mode=type; + + CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr, + CUSPARSE_SPMAT_FILL_MODE, + (const void*) &mode, + sizeof(cusparseFillMode_t))); + return(0); +} + +int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + cusparseDiagType_t cutype=type; + CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr, + CUSPARSE_SPMAT_DIAG_TYPE, + (const void*) &cutype, + sizeof(cusparseDiagType_t))); + return(0); +} + +#endif + +int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val) +{ + int rc; + T_CSRGDeviceMat *cMat= Matrix->mat; + cusparseHandle_t *my_handle=getHandle(); + + if ((rc=writeRemoteBuffer((void *) irp, (void *) cMat->irp, + (m+1)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + + if ((rc=writeRemoteBuffer((void *) ja,(void *) cMat->ja, + (nz)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + if ((rc=writeRemoteBuffer((void *) val, (void *) cMat->val, + (nz)*sizeof(TYPE))) + != SPGPU_SUCCESS) + return(rc); +#if (CUDA_SHORT_VERSION > 10 ) && (CUDA_VERSION < 11030) + if (cusparseGetMatType(cMat->descr)== CUSPARSE_MATRIX_TYPE_TRIANGULAR) { + // Why do we need to set TYPE_GENERAL??? cuSPARSE can be misterious sometimes. + cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL); + CHECK_CUSPARSE(cusparseTcsrsv2_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz, cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang, CUSPARSE_SOLVE_POLICY_USE_LEVEL, + cMat->svbuffer)); + } +#endif + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_CSRGDevice2Host(T_Cmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val) +{ + int rc; + T_CSRGDeviceMat *cMat = Matrix->mat; + + if ((rc=readRemoteBuffer((void *) irp, (void *) cMat->irp, (m+1)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + + if ((rc=readRemoteBuffer((void *) ja, (void *) cMat->ja, (nz)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + if ((rc=readRemoteBuffer((void *) val, (void *) cMat->val, (nz)*sizeof(TYPE))) + != SPGPU_SUCCESS) + return(rc); + + return(CUSPARSE_STATUS_SUCCESS); +} + +#if CUDA_SHORT_VERSION <= 10 +int T_HYBGDeviceFree(T_Hmat *Matrix) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + if (hMat != NULL) { + cusparseDestroyMatDescr(hMat->descr); + cusparseDestroySolveAnalysisInfo(hMat->triang); + cusparseDestroyHybMat(hMat->hybA); + free(hMat); + } + Matrix->mat = NULL; + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + T_HYBGDeviceMat *hMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n,rc; + cusparseMatrixType_t type; + cusparseHandle_t *my_handle=getHandle(); + + /*getAddrMultiVecDevice(deviceX, &vX); + getAddrMultiVecDevice(deviceY, &vY); */ + vX=x->v_; + vY=y->v_; + + /* rc = (int) cusparseGetMatType(hMat->descr); */ + /* fprintf(stderr,"Spmv MatType: %d\n",rc); */ + /* rc = (int) cusparseGetMatDiagType(hMat->descr); */ + /* fprintf(stderr,"Spmv DiagType: %d\n",rc); */ + /* rc = (int) cusparseGetMatFillMode(hMat->descr); */ + /* fprintf(stderr,"Spmv FillMode: %d\n",rc); */ + /* Dirty trick: apparently hybmv does not accept a triangular + matrix even though it should not make a difference. So + we claim it's general anyway */ + type = cusparseGetMatType(hMat->descr); + rc = cusparseSetMatType(hMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL); + if (rc == 0) + rc = (int) cusparseThybmv(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, + (const TYPE *) &alpha, hMat->descr, hMat->hybA, + (const TYPE *) vX, (const TYPE *) &beta, + (TYPE *) vY); + if (rc == 0) + rc = cusparseSetMatType(hMat->descr,type); + return(rc); +} + +int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz) +{ + T_HYBGDeviceMat *hMat; + int nr1=nr, nz1=nz, rc; + if ((nr<0)||(nc<0)||(nz<0)) + return((int) CUSPARSE_STATUS_INVALID_VALUE); + if ((hMat=(T_HYBGDeviceMat *) malloc(sizeof(T_HYBGDeviceMat)))==NULL) + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + hMat->m = nr; + hMat->n = nc; + hMat->nz = nz; + + if ((rc= cusparseCreateMatDescr(&(hMat->descr))) !=0) + return(rc); + if ((rc= cusparseCreateSolveAnalysisInfo(&(hMat->triang))) !=0) + return(rc); + if((rc = cusparseCreateHybMat(&(hMat->hybA))) != 0) + return(rc); + Matrix->mat = hMat; + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatDiagType(hMat->descr,type)); +} + +int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatIndexBase(hMat->descr,type)); +} + +int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatType(hMat->descr,type)); +} + +int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatFillMode(hMat->descr,type)); +} + +int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + //beta?? + T_HYBGDeviceMat *hMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n; + cusparseHandle_t *my_handle=getHandle(); + /*getAddrMultiVecDevice(deviceX, &vX); + getAddrMultiVecDevice(deviceY, &vY); */ + vX=x->v_; + vY=y->v_; + + return cusparseThybsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + (const TYPE *) &alpha, hMat->descr, + hMat->hybA, hMat->triang, + (const TYPE *) vX, (TYPE *) vY); +} + +int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + cusparseSolveAnalysisInfo_t info; + int rc; + cusparseHandle_t *my_handle=getHandle(); + + /* rc = (int) cusparseGetMatType(hMat->descr); */ + /* fprintf(stderr,"Analysis MatType: %d\n",rc); */ + /* rc = (int) cusparseGetMatDiagType(hMat->descr); */ + /* fprintf(stderr,"Analysis DiagType: %d\n",rc); */ + /* rc = (int) cusparseGetMatFillMode(hMat->descr); */ + /* fprintf(stderr,"Analysis FillMode: %d\n",rc); */ + rc = (int) cusparseThybsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + hMat->descr, hMat->hybA, hMat->triang); + + if (rc !=0) { + fprintf(stderr,"From csrsv_analysis: %d\n",rc); + } + return(rc); +} + +int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val) +{ + int rc; double t1,t2; + int nr1=m, nz1=nz; + T_HYBGDeviceMat *hMat= Matrix->mat; + cusparseHandle_t *my_handle=getHandle(); + + if (nr1 == 0) nr1 = 1; + if (nz1 == 0) nz1 = 1; + if ((rc= allocRemoteBuffer(((void **) &(hMat->irp)), ((nr1+1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(hMat->ja)), ((nz1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(hMat->val)), ((nz1)*sizeof(TYPE)))) != 0) + return(rc); + + if ((rc=writeRemoteBuffer((void *) irp, (void *) hMat->irp, + (m+1)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + + if ((rc=writeRemoteBuffer((void *) ja,(void *) hMat->ja, + (nz)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + if ((rc=writeRemoteBuffer((void *) val, (void *) hMat->val, + (nz)*sizeof(TYPE))) + != SPGPU_SUCCESS) + return(rc); + /* rc = (int) cusparseGetMatType(hMat->descr); */ + /* fprintf(stderr,"Conversion MatType: %d\n",rc); */ + /* rc = (int) cusparseGetMatDiagType(hMat->descr); */ + /* fprintf(stderr,"Conversion DiagType: %d\n",rc); */ + /* rc = (int) cusparseGetMatFillMode(hMat->descr); */ + /* fprintf(stderr,"Conversion FillMode: %d\n",rc); */ + //t1=etime(); + rc = (int) cusparseTcsr2hyb(*my_handle, m, n, + hMat->descr, + (const TYPE *)hMat->val, + (const int *)hMat->irp, (const int *)hMat->ja, + hMat->hybA,0, + CUSPARSE_HYB_PARTITION_AUTO); + + freeRemoteBuffer(hMat->irp); hMat->irp = NULL; + freeRemoteBuffer(hMat->ja); hMat->ja = NULL; + freeRemoteBuffer(hMat->val); hMat->val = NULL; + + //cudaSync(); + //t2 = etime(); + //fprintf(stderr,"Inner call to cusparseTcsr2hyb: %lf\n",(t2-t1)); + if (rc != 0) { + fprintf(stderr,"From csr2hyb: %d\n",rc); + } + return(rc); +} +#endif + diff --git a/gpu/hdiagdev.c b/gpu/hdiagdev.c new file mode 100644 index 00000000..4e640278 --- /dev/null +++ b/gpu/hdiagdev.c @@ -0,0 +1,425 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#include "hdiagdev.h" +#include +#include +#include +#include +#if defined(HAVE_SPGPU) +#define DEBUG 0 + + +void freeHdiagDevice(void* remoteMatrix) +{ + struct HdiagDevice *devMat = (struct HdiagDevice *) remoteMatrix; + //fprintf(stderr,"freeHllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->hackOffsets); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + + +HdiagDeviceParams getHdiagDeviceParams(unsigned int rows, unsigned int columns, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType) +{ + HdiagDeviceParams params; + + params.elementType = elementType; + //numero di elementi di val + params.rows = rows; + params.columns = columns; + params.allocationHeight = allocationHeight; + params.hackSize = hackSize; + params.hackCount = hackCount; + + return params; + +} + +int allocHdiagDevice(void **remoteMatrix, HdiagDeviceParams* params) +{ + struct HdiagDevice *tmp = (struct HdiagDevice *)malloc(sizeof(struct HdiagDevice)); + int ret=SPGPU_SUCCESS; + int *tmpOff = NULL; + + *remoteMatrix = (void *) tmp; +#if DEBUG + fprintf(stderr,"From alloc: %p\n",*remoteMatrix); +#endif + + tmp->rows = params->rows; + + tmp->hackSize = params->hackSize; + + tmp->cols = params->columns; + + tmp->allocationHeight = params->allocationHeight; + + tmp->hackCount = params->hackCount; + + + +#if DEBUG + fprintf(stderr,"hackcount %d allocationHeight %d\n",tmp->hackCount,tmp->allocationHeight); +#endif + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->hackOffsets), (tmp->hackCount+1)*sizeof(int)); + + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->hdiaOffsets), tmp->allocationHeight*sizeof(int)); + + /* tmp->baseIndex = params->firstIndex; */ + + if (params->elementType == SPGPU_TYPE_INT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(double)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return ret; +} + +int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType) +{ int i=0; +#ifdef HAVE_SPGPU + HdiagDeviceParams p; + + p = getHdiagDeviceParams(rows, cols, allocationHeight, hackSize, hackCount,elementType); + + i = allocHdiagDevice(deviceMat, &p); +#if DEBUG + fprintf(stderr," Falloc %p \n",*deviceMat); +#endif + + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + } + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif + + +} + +int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets) +{ int i=0,fo,fa,j,k,p; + char buf_a[255], buf_o[255],tmp[255]; +#ifdef HAVE_SPGPU + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + i=SPGPU_SUCCESS; + + +#if DEBUG + fprintf(stderr," Write %p \n",devMat); + + fprintf(stderr,"HDIAG writing to device memory: allocationHeight %d hackCount %d\n", + devMat->allocationHeight,devMat->hackCount); + fprintf(stderr,"HackOffsets: "); + for (j=0; jhackCount+1; j++) + fprintf(stderr," %d",hackOffsets[j]); + fprintf(stderr,"\n"); + fprintf(stderr,"diaOffsets: "); + for (j=0; jallocationHeight; j++) + fprintf(stderr," %d",hdiaOffsets[j]); + fprintf(stderr,"\n"); +#if 1 + fprintf(stderr,"values: \n"); + p=0; + for (j=0; jhackCount; j++){ + fprintf(stderr,"Hack no: %d\n",j+1); + for (k=0; khackSize*(devMat->allocationHeight/devMat->hackCount); k++){ + fprintf(stderr," %d %lf\n",p+1,val[p]); p++; + } + } + fprintf(stderr,"\n"); +#endif +#endif + + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets, + (devMat->hackCount+1)*sizeof(int)); + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets, + devMat->allocationHeight*sizeof(int)); + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + devMat->allocationHeight*devMat->hackSize*sizeof(double)); + if (i!=0) + fprintf(stderr,"Error in writeHdiagDeviceDouble %d\n",i); + +#if DEBUG + fprintf(stderr," EndWrite %p \n",devMat); +#endif + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + + +long long int sizeofHdiagDeviceDouble(void* deviceMat) +{ int i=0,fo,fa; + int *hoff=NULL,*hackoff=NULL; + long long int memsize=0; +#ifdef HAVE_SPGPU + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + + memsize += (devMat->hackCount+1)*sizeof(int); + memsize += devMat->allocationHeight*sizeof(int); + memsize += devMat->allocationHeight*devMat->hackSize*sizeof(double); + +#endif + return(memsize); +} + + + +int readHdiagDeviceDouble(void* deviceMat, double* a, int* off) +{ int i; +#ifdef HAVE_SPGPU + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + /* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double)); */ + /* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */ + + + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY) +{ + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif +#if DEBUG + fprintf(stderr," First %p \n",devMat); + fprintf(stderr,"%d %d %d %p %p %p\n",devMat->rows,devMat->cols, devMat->hackSize, + devMat->hackOffsets, devMat->hdiaOffsets, devMat->cM); +#endif + spgpuDhdiaspmv (handle, (double*)y->v_, (double *)y->v_, alpha, + (double *)devMat->cM,devMat->hdiaOffsets, + devMat->hackSize, devMat->hackOffsets, devMat->rows,devMat->cols, + x->v_, beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets) +{ int i=0,fo,fa,j,k,p; + char buf_a[255], buf_o[255],tmp[255]; +#ifdef HAVE_SPGPU + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + i=SPGPU_SUCCESS; + + +#if DEBUG + fprintf(stderr," Write %p \n",devMat); + + fprintf(stderr,"HDIAG writing to device memory: allocationHeight %d hackCount %d\n", + devMat->allocationHeight,devMat->hackCount); + fprintf(stderr,"HackOffsets: "); + for (j=0; jhackCount+1; j++) + fprintf(stderr," %d",hackOffsets[j]); + fprintf(stderr,"\n"); + fprintf(stderr,"diaOffsets: "); + for (j=0; jallocationHeight; j++) + fprintf(stderr," %d",hdiaOffsets[j]); + fprintf(stderr,"\n"); +#if 1 + fprintf(stderr,"values: \n"); + p=0; + for (j=0; jhackCount; j++){ + fprintf(stderr,"Hack no: %d\n",j+1); + for (k=0; khackSize*(devMat->allocationHeight/devMat->hackCount); k++){ + fprintf(stderr," %d %lf\n",p+1,val[p]); p++; + } + } + fprintf(stderr,"\n"); +#endif +#endif + + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets, + (devMat->hackCount+1)*sizeof(int)); + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets, + devMat->allocationHeight*sizeof(int)); + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + devMat->allocationHeight*devMat->hackSize*sizeof(float)); + if (i!=0) + fprintf(stderr,"Error in writeHdiagDeviceFloat %d\n",i); + +#if DEBUG + fprintf(stderr," EndWrite %p \n",devMat); +#endif + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + + +long long int sizeofHdiagDeviceFloat(void* deviceMat) +{ int i=0,fo,fa; + int *hoff=NULL,*hackoff=NULL; + long long int memsize=0; +#ifdef HAVE_SPGPU + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + + memsize += (devMat->hackCount+1)*sizeof(int); + memsize += devMat->allocationHeight*sizeof(int); + memsize += devMat->allocationHeight*devMat->hackSize*sizeof(float); + +#endif + return(memsize); +} + + + +int readHdiagDeviceFloat(void* deviceMat, float* a, int* off) +{ int i; +#ifdef HAVE_SPGPU + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + /* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float)); */ + /* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */ + + + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY) +{ + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif +#if DEBUG + fprintf(stderr," First %p \n",devMat); + fprintf(stderr,"%d %d %d %p %p %p\n",devMat->rows,devMat->cols, devMat->hackSize, + devMat->hackOffsets, devMat->hdiaOffsets, devMat->cM); +#endif + spgpuShdiaspmv (handle, (float*)y->v_, (float *)y->v_, alpha, + (float *)devMat->cM,devMat->hdiaOffsets, + devMat->hackSize, devMat->hackOffsets, devMat->rows,devMat->cols, + x->v_, beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +#endif diff --git a/gpu/hdiagdev.h b/gpu/hdiagdev.h new file mode 100644 index 00000000..4bce5066 --- /dev/null +++ b/gpu/hdiagdev.h @@ -0,0 +1,111 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#ifndef _HDIAGDEV_H_ +#define _HDIAGDEV_H_ + +#ifdef HAVE_SPGPU +#include "cintrf.h" +#include "hdia.h" + +struct HdiagDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // offset (same size of cM) + int *hdiaOffsets; + + int *hackOffsets; + + int hackCount; + + int rows; + + int cols; + + + int hackSize; + + int allocationHeight; + +}; + +typedef struct HdiagDeviceParams +{ + + unsigned int elementType; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + //unsigned int hackOffsLength; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + unsigned int hackSize; + unsigned int hackCount; + unsigned int allocationHeight; + + +} HdiagDeviceParams; + + + +HdiagDeviceParams getHdiagDeviceParams(unsigned int rows, unsigned int columns, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType); + +int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType); + +int allocHdiagDevice(void ** remoteMatrix, HdiagDeviceParams* params); + + +void freeHdiagDevice(void* remoteMatrix); + +int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets); +int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY); + +int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets); +int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY); + + +#else +#define CINTRF_UNSUPPORTED -1 +#endif + +#endif diff --git a/gpu/hdiagdev_mod.F90 b/gpu/hdiagdev_mod.F90 new file mode 100644 index 00000000..ad0f7cc5 --- /dev/null +++ b/gpu/hdiagdev_mod.F90 @@ -0,0 +1,203 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module hdiagdev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: hdiagdev_parms + integer(c_int) :: element_type + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: hackSize + integer(c_int) :: hackCount + integer(c_int) :: allocationHeight + end type hdiagdev_parms + +#ifdef HAVE_SPGPU + + ! interface computeHdiaHacksCount + ! function computeHdiaHacksCountDouble(allocationHeight,hackOffsets,hackSize, & + ! & diaValues,diaValuesPitch,diags,rows)& + ! & result(res) bind(c,name='computeHdiaHackOffsetsDouble') + ! use iso_c_binding + ! integer(c_int) :: res + ! integer(c_int), value :: rows,diags,diaValuesPitch,hackSize,elementType + ! real(c_double) :: diaValues(rows,:) + ! integer(c_int) :: hackOffsets,allocationHeight + ! end function computeHdiaHacksCountDouble + ! end interface computeHdiaHacksCount + + interface + function FgetHdiagDeviceParams(rows, columns, allocationHeight,hackSize, & + & hackCount, elementType) & + & result(res) bind(c,name='getHdiagDeviceParams') + use iso_c_binding + import :: hdiagdev_parms + type(hdiagdev_parms) :: res + integer(c_int), value :: rows,columns,allocationHeight,& + & elementType,hackSize,hackCount + end function FgetHdiagDeviceParams + end interface + + + interface + function FallocHdiagDevice(deviceMat,rows,columns,allocationHeight,& + & hackSize,hackCount,elementType) & + & result(res) bind(c,name='FallocHdiagDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,columns,allocationHeight,hackSize,& + & hackCount,elementType + type(c_ptr) :: deviceMat + end function FallocHdiagDevice + end interface + + + interface + function sizeofHdiagDeviceDouble(deviceMat) & + & result(res) bind(c,name='sizeofHdiagDeviceDouble') + use iso_c_binding + integer(c_long_long) :: res + type(c_ptr), value :: deviceMat + end function sizeofHdiagDeviceDouble + end interface + + interface writeHdiagDevice + + function writeHdiagDeviceFloat(deviceMat,val,hdiaOffsets, hackOffsets) & + & result(res) bind(c,name='writeHdiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: hdiaOffsets(*), hackOffsets(*) + end function writeHdiagDeviceFloat + + function writeHdiagDeviceDouble(deviceMat,val,hdiaOffsets, hackOffsets) & + & result(res) bind(c,name='writeHdiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: hdiaOffsets(*), hackOffsets(*) + end function writeHdiagDeviceDouble + + end interface writeHdiagDevice + +!!$ interface readHdiagDevice +!!$ +!!$ function readHdiagDeviceFloat(deviceMat,val,ja,ldj,irn) & +!!$ & result(res) bind(c,name='readHdiagDeviceFloat') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int), value :: ldj +!!$ real(c_float) :: val(ldj,*) +!!$ integer(c_int) :: ja(ldj,*),irn(*) +!!$ end function readHdiagDeviceFloat +!!$ +!!$ function readHdiagDeviceDouble(deviceMat,a,off,n) & +!!$ & result(res) bind(c,name='readHdiagDeviceDouble') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int),value :: n +!!$ real(c_double) :: a(n,*) +!!$ integer(c_int) :: off(*) +!!$ end function readHdiagDeviceDouble +!!$ +!!$ function readHdiagDeviceFloatComplex(deviceMat,val,ja,ldj,irn) & +!!$ & result(res) bind(c,name='readHdiagDeviceFloatComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int), value :: ldj +!!$ complex(c_float_complex) :: val(ldj,*) +!!$ integer(c_int) :: ja(ldj,*),irn(*) +!!$ end function readHdiagDeviceFloatComplex +!!$ +!!$ function readHdiagDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) & +!!$ & result(res) bind(c,name='readHdiagDeviceDoubleComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int), value :: ldj +!!$ complex(c_double_complex) :: val(ldj,*) +!!$ integer(c_int) :: ja(ldj,*),irn(*) +!!$ end function readHdiagDeviceDoubleComplex +!!$ +!!$ end interface readHdiagDevice +!!$ + interface + subroutine freeHdiagDevice(deviceMat) & + & bind(c,name='freeHdiagDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeHdiagDevice + end interface + + + interface spmvHdiagDevice + function spmvHdiagDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHdiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvHdiagDeviceFloat + function spmvHdiagDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHdiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvHdiagDeviceDouble +!!$ function spmvHdiagDeviceFloatComplex(deviceMat,alpha,x,beta,y) & +!!$ & result(res) bind(c,name='spmvHdiagDeviceFloatComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat, x, y +!!$ complex(c_float_complex),value :: alpha, beta +!!$ end function spmvHdiagDeviceFloatComplex +!!$ function spmvHdiagDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & +!!$ & result(res) bind(c,name='spmvHdiagDeviceDoubleComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat, x, y +!!$ complex(c_double_complex),value :: alpha, beta +!!$ end function spmvHdiagDeviceDoubleComplex + end interface spmvHdiagDevice + +#endif + +end module hdiagdev_mod diff --git a/gpu/hlldev.c b/gpu/hlldev.c new file mode 100644 index 00000000..afff1df1 --- /dev/null +++ b/gpu/hlldev.c @@ -0,0 +1,615 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#include "hlldev.h" +#if defined(HAVE_SPGPU) +//new +HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, unsigned int elementType, unsigned int firstIndex) +{ + HllDeviceParams params; + + params.elementType = elementType; + params.hackSize = hksize; + //numero di elementi di val + params.allocsize = allocsize; + params.rows = rows; + params.nzt = nzeros; + params.avgNzr = (nzeros+rows-1)/rows; + params.firstIndex = firstIndex; + return params; + +} + +int getHllDeviceParams(HllDevice* mat, int *hksize, int *rows, int *nzeros, + int *allocsize, int *hackOffsLength, int *firstIndex, int *avgnzr) +{ + + + if (mat!=NULL) { + *hackOffsLength = mat->hackOffsLength ; + *hksize = mat->hackSize ; + *nzeros = mat->nzt ; + *allocsize = mat->allocsize ; + *rows = mat->rows ; + *avgnzr = mat->avgNzr ; + *firstIndex = mat->baseIndex ; + return SPGPU_SUCCESS; + } else { + return SPGPU_UNSUPPORTED; + } +} +//new +int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params) +{ + HllDevice *tmp = (HllDevice *)malloc(sizeof(HllDevice)); + int ret=SPGPU_SUCCESS; + *remoteMatrix = (void *)tmp; + + tmp->hackSize = params->hackSize; + + tmp->allocsize = params->allocsize; + + tmp->rows = params->rows; + tmp->avgNzr = params->avgNzr; + tmp->nzt = params->nzt; + tmp->baseIndex = params->firstIndex; + //fprintf(stderr,"Allocating HLG with %d avgNzr\n",params->avgNzr); + tmp->hackOffsLength = (int)(tmp->rows+tmp->hackSize-1)/tmp->hackSize; + + //printf("hackOffsLength %d\n",tmp->hackOffsLength); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int)); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int)); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int)); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->hackOffs), ((tmp->hackOffsLength+1)*sizeof(int))); + + if (params->elementType == SPGPU_TYPE_INT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return ret; +} + +void freeHllDevice(void* remoteMatrix) +{ + HllDevice *devMat = (HllDevice *) remoteMatrix; + //fprintf(stderr,"freeHllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->rS); + freeRemoteBuffer(devMat->diag); + freeRemoteBuffer(devMat->rP); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, + unsigned int elementType, unsigned int firstIndex) +{ int i; +#ifdef HAVE_SPGPU + HllDeviceParams p; + + p = bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex); + i = allocHllDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + } + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta, + devMat->baseIndex);*/ + + spgpuShellspmv (handle, (float *)y->v_, (float *)y->v_, alpha, (float *)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr, devMat->rows, (float *)x->v_, beta, devMat->baseIndex); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +//new +int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta, + devMat->baseIndex);*/ + + spgpuDhellspmv (handle, (double *)y->v_, (double *)y->v_, alpha, (double*)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr, devMat->rows, (double *)x->v_, beta, devMat->baseIndex); + //cudaSync(); + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX, + float complex beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU + cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha)); + cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta)); +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta, + devMat->baseIndex);*/ + + spgpuChellspmv (handle, (cuFloatComplex *)y->v_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr, devMat->rows, (cuFloatComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX, + double complex beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU + cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha)); + cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta)); +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + + spgpuZhellspmv (handle, (cuDoubleComplex *)y->v_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr,devMat->rows, (cuDoubleComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + //i = writeEllDevice(deviceMat, (void *) val, ja, irn); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*i = readEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +// New copy routines. + +int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + float *val, void *deviceMat) +{ int i,j; +#ifdef HAVE_SPGPU + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + float *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + //cudaSync(); + + handle = psb_gpuGetHandle(); + psi_cuda_s_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloat",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + double *val, void *deviceMat) +{ int i,j; +#ifdef HAVE_SPGPU + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + double *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double)); + //fprintf(stderr,"WriteRemoteBuffer val %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer ja %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer irn %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer hoffs %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer idisp %d\n",i); + //cudaSync(); + //fprintf(stderr," hacksz: %d \n",hacksz); + handle = psb_gpuGetHandle(); + psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDouble",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + float complex *val, void *deviceMat) +{ int i,j; +#ifdef HAVE_SPGPU + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + float complex *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + //cudaSync(); + + handle = psb_gpuGetHandle(); + psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + double complex *val, void *deviceMat) +{ int i,j; +#ifdef HAVE_SPGPU + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + double complex *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + //cudaSync(); + + handle = psb_gpuGetHandle(); + psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + + + + +#endif diff --git a/gpu/hlldev.h b/gpu/hlldev.h new file mode 100644 index 00000000..478ad86e --- /dev/null +++ b/gpu/hlldev.h @@ -0,0 +1,161 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#ifndef _HLLDEV_H_ +#define _HLLDEV_H_ + +#ifdef HAVE_SPGPU +#include "cintrf.h" +#include "hell.h" + + +typedef struct hlldevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // row pointers (same size of cM) + int *rP; + + // row size and diagonal position + int *rS; + int *diag; + + int *hackOffs; + + int rows; + int avgNzr; + int hackOffsLength; + int nzt; + int hackSize; //must be multiple of 32 + + //matrix size (uncompressed) + //int rows; + //int columns; + + //allocation size + int allocsize; + + /*(i.e. 0 for C, 1 for Fortran)*/ + int baseIndex; +} HllDevice; + +typedef struct hlldeviceparams +{ + + unsigned int elementType; + + unsigned int hackSize; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + unsigned int avgNzr; + unsigned int nzt; + //unsigned int hackOffsLength; + + // Number of columns. + // Used for error-checking + // unsigned int columns; + + unsigned int allocsize; + + // First index (e.g 0 or 1) + unsigned int firstIndex; + +} HllDeviceParams; + + +HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, + unsigned int elementType, unsigned int firstIndex); +int getHllDeviceParams(HllDevice* mat, int *hksize, int *rows, int *nzeros, + int *allocsize, int *hackOffsLength, int *firstIndex, int *avgnzr); +int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, unsigned int elementType, unsigned int firstIndex); +int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params); +void freeHllDevice(void* remoteMatrix); +int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag); +int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag); +int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); +int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceFloatComplex(void* deviceMat, float complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); + + +int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + float *val, void *deviceMat); +int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + double *val, void *deviceMat); +int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, + int noffs, int isz, int *irn, + int *hoffs, int *idisp, int *ja, + float complex *val, void *deviceMat); +int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, + int noffs, int isz, int *irn, + int *hoffs, int *idisp, int *ja, + double complex *val, void *deviceMat); + +int psi_cuda_s_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, float *val, + int *idiag, int *rP, float *cM); +int psi_cuda_d_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, double *val, + int *idiag, int *rP, double *cM); +int psi_cuda_c_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, float complex *val, + int *idiag, int *rP, float complex *cM); +int psi_cuda_z_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, double complex *val, + int *idiag, int *rP, double complex *cM); + + +#else +#define CINTRF_UNSUPPORTED -1 +#endif + +#endif diff --git a/gpu/hlldev_mod.F90 b/gpu/hlldev_mod.F90 new file mode 100644 index 00000000..4eaa5ce0 --- /dev/null +++ b/gpu/hlldev_mod.F90 @@ -0,0 +1,273 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module hlldev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: hlldev_parms + integer(c_int) :: element_type + integer(c_int) :: hackSize + integer(c_int) :: rows + integer(c_int) :: avgNzr + integer(c_int) :: allocsize + integer(c_int) :: firstIndex + end type hlldev_parms + +#ifdef HAVE_SPGPU + + interface + function bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex) & + & result(res) bind(c,name='bldHllDeviceParams') + use iso_c_binding + import :: hlldev_parms + type(hlldev_parms) :: res + integer(c_int), value :: hksize,rows,nzeros,allocsize,elementType,firstIndex + end function BldHllDeviceParams + end interface + + interface + function getHllDeviceParams(deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) & + & result(res) bind(c,name='getHllDeviceParams') + use iso_c_binding + import :: hlldev_parms + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + end function GetHllDeviceParams + end interface + + + interface + function FallocHllDevice(deviceMat,hksize,rows, nzeros,allocsize, & + & elementType,firstIndex) & + & result(res) bind(c,name='FallocHllDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: hksize,rows,nzeros,allocsize,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocHllDevice + end interface + + + interface writeHllDevice + + function writeHllDeviceFloat(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceFloat + + function writeHllDeviceDouble(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceDouble + + function writeHllDeviceFloatComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceFloatComplex + + function writeHllDeviceDoubleComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceDoubleComplex + + end interface + + interface readHllDevice + + function readHllDeviceFloat(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceFloat + + function readHllDeviceDouble(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceDouble + + function readHllDeviceFloatComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceFloatComplex + + function readHllDeviceDoubleComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceDoubleComplex + + end interface + + interface + subroutine freeHllDevice(deviceMat) & + & bind(c,name='freeHllDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeHllDevice + end interface + + + interface psi_CopyCooToHlg + function psiCopyCooToHlgFloat(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgFloat + function psiCopyCooToHlgDouble(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgDouble + function psiCopyCooToHlgFloatComplex(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgFloatComplex + function psiCopyCooToHlgDoubleComplex(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgDoubleComplex + end interface + + + !interface + ! function getHllDevicePitch(deviceMat) & + ! & bind(c,name='getHllDevicePitch') result(res) + ! use iso_c_binding + ! type(c_ptr), value :: deviceMat + ! integer(c_int) :: res + ! end function getHllDevicePitch + !end interface + + !interface + ! function getHllDeviceMaxRowSize(deviceMat) & + ! & bind(c,name='getHllDeviceMaxRowSize') result(res) + ! use iso_c_binding + ! type(c_ptr), value :: deviceMat + ! integer(c_int) :: res + ! end function getHllDeviceMaxRowSize + !end interface + + interface spmvHllDevice + + function spmvHllDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvHllDeviceFloat + + function spmvHllDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvHllDeviceDouble + + function spmvHllDeviceFloatComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex),value :: alpha, beta + end function spmvHllDeviceFloatComplex + + function spmvHllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex),value :: alpha, beta + end function spmvHllDeviceDoubleComplex + + end interface + +#endif + + +end module hlldev_mod diff --git a/gpu/impl/Makefile b/gpu/impl/Makefile new file mode 100755 index 00000000..158066f2 --- /dev/null +++ b/gpu/impl/Makefile @@ -0,0 +1,294 @@ +include ../../Make.inc +LIBDIR=../../lib +INCDIR=../../include +MODDIR=../../modules +PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. +CINCLUDES=-I$(GPU_INCDIR) -I$(CUDA_INCDIR) +LIBNAME=libpsb_gpu.a + +OBJS= \ +psb_d_cp_csrg_from_coo.o \ +psb_d_cp_csrg_from_fmt.o \ +psb_d_cp_elg_from_coo.o \ +psb_d_cp_elg_from_fmt.o \ +psb_s_cp_csrg_from_coo.o \ +psb_s_cp_csrg_from_fmt.o \ +psb_s_csrg_allocate_mnnz.o \ +psb_s_csrg_csmm.o \ +psb_s_csrg_csmv.o \ +psb_s_csrg_mold.o \ +psb_s_csrg_reallocate_nz.o \ +psb_s_csrg_scal.o \ +psb_s_csrg_scals.o \ +psb_s_csrg_from_gpu.o \ +psb_s_csrg_to_gpu.o \ +psb_s_csrg_vect_mv.o \ +psb_s_csrg_inner_vect_sv.o \ +psb_d_csrg_allocate_mnnz.o \ +psb_d_csrg_csmm.o \ +psb_d_csrg_csmv.o \ +psb_d_csrg_mold.o \ +psb_d_csrg_reallocate_nz.o \ +psb_d_csrg_scal.o \ +psb_d_csrg_scals.o \ +psb_d_csrg_from_gpu.o \ +psb_d_csrg_to_gpu.o \ +psb_d_csrg_vect_mv.o \ +psb_d_csrg_inner_vect_sv.o \ +psb_d_elg_allocate_mnnz.o \ +psb_d_elg_asb.o \ +psb_d_elg_csmm.o \ +psb_d_elg_csmv.o \ +psb_d_elg_csput.o \ +psb_d_elg_from_gpu.o \ +psb_d_elg_inner_vect_sv.o \ +psb_d_elg_mold.o \ +psb_d_elg_reallocate_nz.o \ +psb_d_elg_scal.o \ +psb_d_elg_scals.o \ +psb_d_elg_to_gpu.o \ +psb_d_elg_vect_mv.o \ +psb_d_mv_csrg_from_coo.o \ +psb_d_mv_csrg_from_fmt.o \ +psb_d_mv_elg_from_coo.o \ +psb_d_mv_elg_from_fmt.o \ +psb_s_mv_csrg_from_coo.o \ +psb_s_mv_csrg_from_fmt.o \ +psb_s_cp_elg_from_coo.o \ +psb_s_cp_elg_from_fmt.o \ +psb_s_elg_allocate_mnnz.o \ +psb_s_elg_asb.o \ +psb_s_elg_csmm.o \ +psb_s_elg_csmv.o \ +psb_s_elg_csput.o \ +psb_s_elg_inner_vect_sv.o \ +psb_s_elg_mold.o \ +psb_s_elg_reallocate_nz.o \ +psb_s_elg_scal.o \ +psb_s_elg_scals.o \ +psb_s_elg_to_gpu.o \ +psb_s_elg_from_gpu.o \ +psb_s_elg_vect_mv.o \ +psb_s_mv_elg_from_coo.o \ +psb_s_mv_elg_from_fmt.o \ +psb_s_cp_hlg_from_fmt.o \ +psb_s_cp_hlg_from_coo.o \ +psb_d_cp_hlg_from_fmt.o \ +psb_d_cp_hlg_from_coo.o \ +psb_d_hlg_allocate_mnnz.o \ +psb_d_hlg_csmm.o \ +psb_d_hlg_csmv.o \ +psb_d_hlg_inner_vect_sv.o \ +psb_d_hlg_mold.o \ +psb_d_hlg_reallocate_nz.o \ +psb_d_hlg_scal.o \ +psb_d_hlg_scals.o \ +psb_d_hlg_from_gpu.o \ +psb_d_hlg_to_gpu.o \ +psb_d_hlg_vect_mv.o \ +psb_s_hlg_allocate_mnnz.o \ +psb_s_hlg_csmm.o \ +psb_s_hlg_csmv.o \ +psb_s_hlg_inner_vect_sv.o \ +psb_s_hlg_mold.o \ +psb_s_hlg_reallocate_nz.o \ +psb_s_hlg_scal.o \ +psb_s_hlg_scals.o \ +psb_s_hlg_from_gpu.o \ +psb_s_hlg_to_gpu.o \ +psb_s_hlg_vect_mv.o \ +psb_s_mv_hlg_from_coo.o \ +psb_s_cp_hlg_from_coo.o \ +psb_s_mv_hlg_from_fmt.o \ +psb_d_mv_hlg_from_coo.o \ +psb_d_cp_hlg_from_coo.o \ +psb_d_mv_hlg_from_fmt.o \ +psb_s_hybg_allocate_mnnz.o \ +psb_s_hybg_csmm.o \ +psb_s_hybg_csmv.o \ +psb_s_hybg_reallocate_nz.o \ +psb_s_hybg_scal.o \ +psb_s_hybg_scals.o \ +psb_s_hybg_to_gpu.o \ +psb_s_hybg_vect_mv.o \ +psb_s_hybg_inner_vect_sv.o \ +psb_s_cp_hybg_from_coo.o \ +psb_s_cp_hybg_from_fmt.o \ +psb_s_mv_hybg_from_fmt.o \ +psb_s_mv_hybg_from_coo.o \ +psb_s_hybg_mold.o \ +psb_d_hybg_allocate_mnnz.o \ +psb_d_hybg_csmm.o \ +psb_d_hybg_csmv.o \ +psb_d_hybg_reallocate_nz.o \ +psb_d_hybg_scal.o \ +psb_d_hybg_scals.o \ +psb_d_hybg_to_gpu.o \ +psb_d_hybg_vect_mv.o \ +psb_d_hybg_inner_vect_sv.o \ +psb_d_cp_hybg_from_coo.o \ +psb_d_cp_hybg_from_fmt.o \ +psb_d_mv_hybg_from_fmt.o \ +psb_d_mv_hybg_from_coo.o \ +psb_d_hybg_mold.o \ +psb_z_cp_csrg_from_coo.o \ +psb_z_cp_csrg_from_fmt.o \ +psb_z_cp_elg_from_coo.o \ +psb_z_cp_elg_from_fmt.o \ +psb_c_cp_csrg_from_coo.o \ +psb_c_cp_csrg_from_fmt.o \ +psb_c_csrg_allocate_mnnz.o \ +psb_c_csrg_csmm.o \ +psb_c_csrg_csmv.o \ +psb_c_csrg_mold.o \ +psb_c_csrg_reallocate_nz.o \ +psb_c_csrg_scal.o \ +psb_c_csrg_scals.o \ +psb_c_csrg_from_gpu.o \ +psb_c_csrg_to_gpu.o \ +psb_c_csrg_vect_mv.o \ +psb_c_csrg_inner_vect_sv.o \ +psb_z_csrg_allocate_mnnz.o \ +psb_z_csrg_csmm.o \ +psb_z_csrg_csmv.o \ +psb_z_csrg_mold.o \ +psb_z_csrg_reallocate_nz.o \ +psb_z_csrg_scal.o \ +psb_z_csrg_scals.o \ +psb_z_csrg_from_gpu.o \ +psb_z_csrg_to_gpu.o \ +psb_z_csrg_vect_mv.o \ +psb_z_csrg_inner_vect_sv.o \ +psb_z_elg_allocate_mnnz.o \ +psb_z_elg_asb.o \ +psb_z_elg_csmm.o \ +psb_z_elg_csmv.o \ +psb_z_elg_csput.o \ +psb_z_elg_inner_vect_sv.o \ +psb_z_elg_mold.o \ +psb_z_elg_reallocate_nz.o \ +psb_z_elg_scal.o \ +psb_z_elg_scals.o \ +psb_z_elg_to_gpu.o \ +psb_z_elg_from_gpu.o \ +psb_z_elg_vect_mv.o \ +psb_z_mv_csrg_from_coo.o \ +psb_z_mv_csrg_from_fmt.o \ +psb_z_mv_elg_from_coo.o \ +psb_z_mv_elg_from_fmt.o \ +psb_c_mv_csrg_from_coo.o \ +psb_c_mv_csrg_from_fmt.o \ +psb_c_cp_elg_from_coo.o \ +psb_c_cp_elg_from_fmt.o \ +psb_c_elg_allocate_mnnz.o \ +psb_c_elg_asb.o \ +psb_c_elg_csmm.o \ +psb_c_elg_csmv.o \ +psb_c_elg_csput.o \ +psb_c_elg_inner_vect_sv.o \ +psb_c_elg_mold.o \ +psb_c_elg_reallocate_nz.o \ +psb_c_elg_scal.o \ +psb_c_elg_scals.o \ +psb_c_elg_to_gpu.o \ +psb_c_elg_from_gpu.o \ +psb_c_elg_vect_mv.o \ +psb_c_mv_elg_from_coo.o \ +psb_c_mv_elg_from_fmt.o \ +psb_c_cp_hlg_from_fmt.o \ +psb_c_cp_hlg_from_coo.o \ +psb_z_cp_hlg_from_fmt.o \ +psb_z_cp_hlg_from_coo.o \ +psb_z_hlg_allocate_mnnz.o \ +psb_z_hlg_csmm.o \ +psb_z_hlg_csmv.o \ +psb_z_hlg_inner_vect_sv.o \ +psb_z_hlg_mold.o \ +psb_z_hlg_reallocate_nz.o \ +psb_z_hlg_scal.o \ +psb_z_hlg_scals.o \ +psb_z_hlg_from_gpu.o \ +psb_z_hlg_to_gpu.o \ +psb_z_hlg_vect_mv.o \ +psb_c_hlg_allocate_mnnz.o \ +psb_c_hlg_csmm.o \ +psb_c_hlg_csmv.o \ +psb_c_hlg_inner_vect_sv.o \ +psb_c_hlg_mold.o \ +psb_c_hlg_reallocate_nz.o \ +psb_c_hlg_scal.o \ +psb_c_hlg_scals.o \ +psb_c_hlg_from_gpu.o \ +psb_c_hlg_to_gpu.o \ +psb_c_hlg_vect_mv.o \ +psb_c_mv_hlg_from_coo.o \ +psb_c_cp_hlg_from_coo.o \ +psb_c_mv_hlg_from_fmt.o \ +psb_z_mv_hlg_from_coo.o \ +psb_z_cp_hlg_from_coo.o \ +psb_z_mv_hlg_from_fmt.o \ +psb_c_hybg_allocate_mnnz.o \ +psb_c_hybg_csmm.o \ +psb_c_hybg_csmv.o \ +psb_c_hybg_reallocate_nz.o \ +psb_c_hybg_scal.o \ +psb_c_hybg_scals.o \ +psb_c_hybg_to_gpu.o \ +psb_c_hybg_vect_mv.o \ +psb_c_hybg_inner_vect_sv.o \ +psb_c_cp_hybg_from_coo.o \ +psb_c_cp_hybg_from_fmt.o \ +psb_c_mv_hybg_from_fmt.o \ +psb_c_mv_hybg_from_coo.o \ +psb_c_hybg_mold.o \ +psb_z_hybg_allocate_mnnz.o \ +psb_z_hybg_csmm.o \ +psb_z_hybg_csmv.o \ +psb_z_hybg_reallocate_nz.o \ +psb_z_hybg_scal.o \ +psb_z_hybg_scals.o \ +psb_z_hybg_to_gpu.o \ +psb_z_hybg_vect_mv.o \ +psb_z_hybg_inner_vect_sv.o \ +psb_z_cp_hybg_from_coo.o \ +psb_z_cp_hybg_from_fmt.o \ +psb_z_mv_hybg_from_fmt.o \ +psb_z_mv_hybg_from_coo.o \ +psb_z_hybg_mold.o \ +psb_d_cp_diag_from_coo.o \ +psb_d_mv_diag_from_coo.o \ +psb_d_diag_to_gpu.o \ +psb_d_diag_csmv.o \ +psb_d_diag_mold.o \ +psb_d_diag_vect_mv.o \ +psb_d_cp_hdiag_from_coo.o \ +psb_d_mv_hdiag_from_coo.o \ +psb_d_hdiag_to_gpu.o \ +psb_d_hdiag_csmv.o \ +psb_d_hdiag_mold.o \ +psb_d_hdiag_vect_mv.o \ +psb_s_cp_hdiag_from_coo.o \ +psb_s_mv_hdiag_from_coo.o \ +psb_s_hdiag_to_gpu.o \ +psb_s_hdiag_csmv.o \ +psb_s_hdiag_mold.o \ +psb_s_hdiag_vect_mv.o \ +psb_s_dnsg_mat_impl.o \ +psb_d_dnsg_mat_impl.o \ +psb_c_dnsg_mat_impl.o \ +psb_z_dnsg_mat_impl.o + + +objs: $(OBJS) +lib: objs + ar cur ../$(LIBNAME) $(OBJS) + +clean: + /bin/rm -f $(OBJS) diff --git a/gpu/impl/psb_c_cp_csrg_from_coo.F90 b/gpu/impl/psb_c_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..9ab3b7f0 --- /dev/null +++ b/gpu/impl/psb_c_cp_csrg_from_coo.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cp_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_cp_csrg_from_coo +#else + use psb_c_csrg_mat_mod +#endif + implicit none + + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_csrg_from_coo diff --git a/gpu/impl/psb_c_cp_csrg_from_fmt.F90 b/gpu/impl/psb_c_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..5229244f --- /dev/null +++ b/gpu/impl/psb_c_cp_csrg_from_fmt.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_cp_csrg_from_fmt +#else + use psb_c_csrg_mat_mod +#endif + !use iso_c_binding + implicit none + + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_c_cp_csrg_from_fmt diff --git a/gpu/impl/psb_c_cp_diag_from_coo.F90 b/gpu/impl/psb_c_cp_diag_from_coo.F90 new file mode 100644 index 00000000..8d196891 --- /dev/null +++ b/gpu/impl/psb_c_cp_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_c_diag_mat_mod, psb_protect_name => psb_c_cp_diag_from_coo +#else + use psb_c_diag_mat_mod +#endif + implicit none + + class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_c_dia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_diag_from_coo diff --git a/gpu/impl/psb_c_cp_elg_from_coo.F90 b/gpu/impl/psb_c_cp_elg_from_coo.F90 new file mode 100644 index 00000000..95193c13 --- /dev/null +++ b/gpu/impl/psb_c_cp_elg_from_coo.F90 @@ -0,0 +1,184 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_cp_elg_from_coo + use psi_ext_util_mod + use psb_gpu_env_mod +#else + use psb_c_elg_mat_mod +#endif + implicit none + + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ +#ifdef HAVE_SPGPU + hacksize = max(1,psb_gpu_WarpSize()) +#else + hacksize = 1 +#endif + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + +#ifdef HAVE_SPGPU + call psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() +#else + + call psi_c_convert_ell_from_coo(a,b,info,hacksize=hacksize) + call a%set_host() +#endif + + else + call b%cp_to_coo(tmp,info) +#ifdef HAVE_SPGPU + call psi_c_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() +#else + + call psi_c_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) + call a%set_host() +#endif + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_c_count_ell_from_coo + +end subroutine psb_c_cp_elg_from_coo diff --git a/gpu/impl/psb_c_cp_elg_from_fmt.F90 b/gpu/impl/psb_c_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..e8be8a8d --- /dev/null +++ b/gpu/impl/psb_c_cp_elg_from_fmt.F90 @@ -0,0 +1,101 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_cp_elg_from_fmt +#else + use psb_c_elg_mat_mod +#endif + implicit none + + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_c_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cp_elg_from_fmt diff --git a/gpu/impl/psb_c_cp_hdiag_from_coo.F90 b/gpu/impl/psb_c_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..f0ec00ad --- /dev/null +++ b/gpu/impl/psb_c_cp_hdiag_from_coo.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_cp_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_c_hdiag_mat_mod +#endif + implicit none + + class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_c_hdia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_hdiag_from_coo diff --git a/gpu/impl/psb_c_cp_hlg_from_coo.F90 b/gpu/impl/psb_c_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..cf305592 --- /dev/null +++ b/gpu/impl/psb_c_cp_hlg_from_coo.F90 @@ -0,0 +1,198 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_cp_hlg_from_coo +#else + use psb_c_hlg_mat_mod +#endif + implicit none + + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() +#ifdef HAVE_SPGPU + hksz = max(1,psb_gpu_WarpSize()) +#else + hksz = psi_get_hksz() +#endif + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_c_cp_hlg_from_coo diff --git a/gpu/impl/psb_c_cp_hlg_from_fmt.F90 b/gpu/impl/psb_c_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..559c501c --- /dev/null +++ b/gpu/impl/psb_c_cp_hlg_from_fmt.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_cp_hlg_from_fmt +#else + use psb_c_hlg_mat_mod +#endif + implicit none + + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_c_hll_sparse_mat%cp_from_fmt(b,info) +#ifdef HAVE_SPGPU + if (info == 0) call a%to_gpu(info) +#endif + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_hlg_from_fmt diff --git a/gpu/impl/psb_c_cp_hybg_from_coo.F90 b/gpu/impl/psb_c_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..00a7d4ee --- /dev/null +++ b/gpu/impl/psb_c_cp_hybg_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cp_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_cp_hybg_from_coo +#else + use psb_c_hybg_mat_mod +#endif + implicit none + + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_hybg_from_coo +#endif diff --git a/gpu/impl/psb_c_cp_hybg_from_fmt.F90 b/gpu/impl/psb_c_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..643abf99 --- /dev/null +++ b/gpu/impl/psb_c_cp_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_cp_hybg_from_fmt +#else + use psb_c_hybg_mat_mod +#endif + implicit none + + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_c_cp_hybg_from_fmt +#endif diff --git a/gpu/impl/psb_c_csrg_allocate_mnnz.F90 b/gpu/impl/psb_c_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..2183ee63 --- /dev/null +++ b/gpu/impl/psb_c_csrg_allocate_mnnz.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_allocate_mnnz +#else + use psb_c_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_csrg_allocate_mnnz diff --git a/gpu/impl/psb_c_csrg_csmm.F90 b/gpu/impl/psb_c_csrg_csmm.F90 new file mode 100644 index 00000000..cef5d288 --- /dev/null +++ b/gpu/impl/psb_c_csrg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_csmm +#else + use psb_c_csrg_mat_mod +#endif + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_csrg_csmv +#else + use psb_c_csrg_mat_mod +#endif + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_csrg_from_gpu +#else + use psb_c_csrg_mat_mod +#endif + implicit none + class(psb_c_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() +#endif + +end subroutine psb_c_csrg_from_gpu diff --git a/gpu/impl/psb_c_csrg_inner_vect_sv.F90 b/gpu/impl/psb_c_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..39938752 --- /dev/null +++ b/gpu/impl/psb_c_csrg_inner_vect_sv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_inner_vect_sv +#else + use psb_c_csrg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_csrg_inner_vect_sv diff --git a/gpu/impl/psb_c_csrg_mold.F90 b/gpu/impl/psb_c_csrg_mold.F90 new file mode 100644 index 00000000..8b1b616a --- /dev/null +++ b/gpu/impl/psb_c_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_mold(a,b,info) + + use psb_base_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_mold + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_csrg_mold diff --git a/gpu/impl/psb_c_csrg_reallocate_nz.F90 b/gpu/impl/psb_c_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..e9db4128 --- /dev/null +++ b/gpu/impl/psb_c_csrg_reallocate_nz.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_reallocate_nz +#else + use psb_c_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_c_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_csrg_reallocate_nz diff --git a/gpu/impl/psb_c_csrg_scal.F90 b/gpu/impl/psb_c_csrg_scal.F90 new file mode 100644 index 00000000..f183a822 --- /dev/null +++ b/gpu/impl/psb_c_csrg_scal.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_scal +#else + use psb_c_csrg_mat_mod +#endif + implicit none + class(psb_c_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_c_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_csrg_scal diff --git a/gpu/impl/psb_c_csrg_scals.F90 b/gpu/impl/psb_c_csrg_scals.F90 new file mode 100644 index 00000000..13f0d707 --- /dev/null +++ b/gpu/impl/psb_c_csrg_scals.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_scals +#else + use psb_c_csrg_mat_mod +#endif + implicit none + class(psb_c_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_c_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_csrg_scals diff --git a/gpu/impl/psb_c_csrg_to_gpu.F90 b/gpu/impl/psb_c_csrg_to_gpu.F90 new file mode 100644 index 00000000..a04f1bab --- /dev/null +++ b/gpu/impl/psb_c_csrg_to_gpu.F90 @@ -0,0 +1,325 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_to_gpu +#else + use psb_c_csrg_mat_mod +#endif + implicit none + class(psb_c_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_spk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if CUDA_SHORT_VERSION <= 10 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if +#endif + +end subroutine psb_c_csrg_to_gpu diff --git a/gpu/impl/psb_c_csrg_vect_mv.F90 b/gpu/impl/psb_c_csrg_vect_mv.F90 new file mode 100644 index 00000000..0feb03fd --- /dev/null +++ b/gpu/impl/psb_c_csrg_vect_mv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_vect_mv +#else + use psb_c_csrg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_c_csrg_vect_mv diff --git a/gpu/impl/psb_c_diag_csmv.F90 b/gpu/impl/psb_c_diag_csmv.F90 new file mode 100644 index 00000000..05ca102f --- /dev/null +++ b/gpu/impl/psb_c_diag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_csmv +#else + use psb_c_diag_mat_mod +#endif + implicit none + class(psb_c_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='c_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_diag_mold + implicit none + class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_diag_mold diff --git a/gpu/impl/psb_c_diag_to_gpu.F90 b/gpu/impl/psb_c_diag_to_gpu.F90 new file mode 100644 index 00000000..a60fc741 --- /dev/null +++ b/gpu/impl/psb_c_diag_to_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_diag_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_to_gpu +#else + use psb_c_diag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_c_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d +#ifdef HAVE_SPGPU + type(diagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_complex_float) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_complex_float) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_c_diag_to_gpu diff --git a/gpu/impl/psb_c_diag_vect_mv.F90 b/gpu/impl/psb_c_diag_vect_mv.F90 new file mode 100644 index 00000000..e680a737 --- /dev/null +++ b/gpu/impl/psb_c_diag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_vect_mv +#else + use psb_c_diag_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_diag_vect_mv diff --git a/gpu/impl/psb_c_dnsg_mat_impl.F90 b/gpu/impl/psb_c_dnsg_mat_impl.F90 new file mode 100644 index 00000000..b70f383a --- /dev/null +++ b/gpu/impl/psb_c_dnsg_mat_impl.F90 @@ -0,0 +1,461 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_c_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_dnsg_vect_mv +#else + use psb_c_dnsg_mat_mod +#endif + implicit none + class(psb_c_dnsg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + complex(psb_spk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='c_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_dnsg_vect_mv + + +subroutine psb_c_dnsg_mold(a,b,info) + use psb_base_mod + use psb_c_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_dnsg_mold +#else + use psb_c_dnsg_mat_mod +#endif + implicit none + class(psb_c_dnsg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_c_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_c_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_c_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_c_dnsg_allocate_mnnz +!!$ end interface + + +subroutine psb_c_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_c_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_dnsg_to_gpu +#else + use psb_c_dnsg_mat_mod +#endif + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='c_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ +#ifdef HAVE_SPGPU + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_complex_float,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_dnsg_to_gpu + + + +subroutine psb_c_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_c_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_cp_dnsg_from_coo +#else + use psb_c_dnsg_mat_mod +#endif + implicit none + + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_c_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_c_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cp_dnsg_from_coo + +subroutine psb_c_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_c_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_cp_dnsg_from_fmt +#else + use psb_c_dnsg_mat_mod +#endif + implicit none + + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_c_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cp_dnsg_from_fmt + + + +subroutine psb_c_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_c_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_mv_dnsg_from_coo +#else + use psb_c_dnsg_mat_mod +#endif + implicit none + + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='c_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_mv_dnsg_from_coo + + +subroutine psb_c_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_c_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_mv_dnsg_from_fmt +#else + use psb_c_dnsg_mat_mod +#endif + implicit none + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_c_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_c_mv_dnsg_from_fmt diff --git a/gpu/impl/psb_c_elg_allocate_mnnz.F90 b/gpu/impl/psb_c_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..ac9e654f --- /dev/null +++ b/gpu/impl/psb_c_elg_allocate_mnnz.F90 @@ -0,0 +1,113 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_allocate_mnnz +#else + use psb_c_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_complex_float,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize +#else + ld = m +#endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_allocate_mnnz diff --git a/gpu/impl/psb_c_elg_asb.f90 b/gpu/impl/psb_c_elg_asb.f90 new file mode 100644 index 00000000..f2a8c641 --- /dev/null +++ b/gpu/impl/psb_c_elg_asb.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_asb(a) + + use psb_base_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_asb + implicit none + + class(psb_c_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_asb diff --git a/gpu/impl/psb_c_elg_csmm.F90 b/gpu/impl/psb_c_elg_csmm.F90 new file mode 100644 index 00000000..5d355d88 --- /dev/null +++ b/gpu/impl/psb_c_elg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csmm +#else + use psb_c_elg_mat_mod +#endif + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_elg_csmv +#else + use psb_c_elg_mat_mod +#endif + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_elg_csput_a +#else + use psb_c_elg_mat_mod +#endif + implicit none + + class(psb_c_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_c_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_csput_a + + + +subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding +#ifdef HAVE_SPGPU + use elldev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csput_v + use psb_c_gpu_vect_mod +#else + use psb_c_elg_mat_mod +#endif + implicit none + + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_gpu) + select type (ja) + class is (psb_i_vect_gpu) + select type (val) + class is (psb_c_vect_gpu) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceFloatComplex(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_c_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_c_elg_csput_v diff --git a/gpu/impl/psb_c_elg_from_gpu.F90 b/gpu/impl/psb_c_elg_from_gpu.F90 new file mode 100644 index 00000000..eda65380 --- /dev/null +++ b/gpu/impl/psb_c_elg_from_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_from_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_from_gpu +#else + use psb_c_elg_mat_mod +#endif + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_c_elg_from_gpu diff --git a/gpu/impl/psb_c_elg_inner_vect_sv.F90 b/gpu/impl/psb_c_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..97f0f7ff --- /dev/null +++ b/gpu/impl/psb_c_elg_inner_vect_sv.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_inner_vect_sv +#else + use psb_c_elg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_elg_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_c_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_inner_vect_sv diff --git a/gpu/impl/psb_c_elg_mold.F90 b/gpu/impl/psb_c_elg_mold.F90 new file mode 100644 index 00000000..17cd2ce2 --- /dev/null +++ b/gpu/impl/psb_c_elg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_mold(a,b,info) + + use psb_base_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_mold + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_mold diff --git a/gpu/impl/psb_c_elg_reallocate_nz.F90 b/gpu/impl/psb_c_elg_reallocate_nz.F90 new file mode 100644 index 00000000..40d94d36 --- /dev/null +++ b/gpu/impl/psb_c_elg_reallocate_nz.F90 @@ -0,0 +1,79 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_reallocate_nz +#else + use psb_c_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_reallocate_nz diff --git a/gpu/impl/psb_c_elg_scal.F90 b/gpu/impl/psb_c_elg_scal.F90 new file mode 100644 index 00000000..63d9907e --- /dev/null +++ b/gpu/impl/psb_c_elg_scal.F90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_scal +#else + use psb_c_elg_mat_mod +#endif + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_scal diff --git a/gpu/impl/psb_c_elg_scals.F90 b/gpu/impl/psb_c_elg_scals.F90 new file mode 100644 index 00000000..b954e0a1 --- /dev/null +++ b/gpu/impl/psb_c_elg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_scals +#else + use psb_c_elg_mat_mod +#endif + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_scals diff --git a/gpu/impl/psb_c_elg_to_gpu.F90 b/gpu/impl/psb_c_elg_to_gpu.F90 new file mode 100644 index 00000000..b967a59b --- /dev/null +++ b/gpu/impl/psb_c_elg_to_gpu.F90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_to_gpu +#else + use psb_c_elg_mat_mod +#endif + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_complex_float,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_complex_float,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_c_elg_to_gpu diff --git a/gpu/impl/psb_c_elg_trim.f90 b/gpu/impl/psb_c_elg_trim.f90 new file mode 100644 index 00000000..bc0c0696 --- /dev/null +++ b/gpu/impl/psb_c_elg_trim.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_trim(a) + + use psb_base_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_trim + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_trim diff --git a/gpu/impl/psb_c_elg_vect_mv.F90 b/gpu/impl/psb_c_elg_vect_mv.F90 new file mode 100644 index 00000000..ec6e5b50 --- /dev/null +++ b/gpu/impl/psb_c_elg_vect_mv.F90 @@ -0,0 +1,131 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_vect_mv +#else + use psb_c_elg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + if (a%is_dev()) call a%sync() + call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_vect_mv diff --git a/gpu/impl/psb_c_hdiag_csmv.F90 b/gpu/impl/psb_c_hdiag_csmv.F90 new file mode 100644 index 00000000..1ba58c6f --- /dev/null +++ b/gpu/impl/psb_c_hdiag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_csmv +#else + use psb_c_hdiag_mat_mod +#endif + implicit none + class(psb_c_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='c_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_hdiag_mold + implicit none + class(psb_c_hdiag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hdiag_mold diff --git a/gpu/impl/psb_c_hdiag_to_gpu.F90 b/gpu/impl/psb_c_hdiag_to_gpu.F90 new file mode 100644 index 00000000..565babe0 --- /dev/null +++ b/gpu/impl/psb_c_hdiag_to_gpu.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hdiag_to_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_to_gpu +#else + use psb_c_hdiag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_c_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight +#ifdef HAVE_SPGPU + type(hdiagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +#endif + +end subroutine psb_c_hdiag_to_gpu diff --git a/gpu/impl/psb_c_hdiag_vect_mv.F90 b/gpu/impl/psb_c_hdiag_vect_mv.F90 new file mode 100644 index 00000000..a891a274 --- /dev/null +++ b/gpu/impl/psb_c_hdiag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_vect_mv +#else + use psb_c_hdiag_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_c_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hdiag_vect_mv diff --git a/gpu/impl/psb_c_hlg_allocate_mnnz.F90 b/gpu/impl/psb_c_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..27e5c0b6 --- /dev/null +++ b/gpu/impl/psb_c_hlg_allocate_mnnz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_allocate_mnnz +#else + use psb_c_hlg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(hlldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_hll_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hlg_allocate_mnnz diff --git a/gpu/impl/psb_c_hlg_csmm.F90 b/gpu/impl/psb_c_hlg_csmm.F90 new file mode 100644 index 00000000..c33b2dde --- /dev/null +++ b/gpu/impl/psb_c_hlg_csmm.F90 @@ -0,0 +1,132 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_csmm +#else + use psb_c_hlg_mat_mod +#endif + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_hlg_csmv +#else + use psb_c_hlg_mat_mod +#endif + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='c_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_hlg_from_gpu +#else + use psb_c_hlg_mat_mod +#endif + implicit none + class(psb_c_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + +#ifdef HAVE_SPGPU + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_c_hlg_from_gpu diff --git a/gpu/impl/psb_c_hlg_inner_vect_sv.F90 b/gpu/impl/psb_c_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..0955d8a1 --- /dev/null +++ b/gpu/impl/psb_c_hlg_inner_vect_sv.F90 @@ -0,0 +1,81 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_inner_vect_sv +#else + use psb_c_hlg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_base_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_c_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hlg_inner_vect_sv diff --git a/gpu/impl/psb_c_hlg_mold.F90 b/gpu/impl/psb_c_hlg_mold.F90 new file mode 100644 index 00000000..321111f0 --- /dev/null +++ b/gpu/impl/psb_c_hlg_mold.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_mold(a,b,info) + + use psb_base_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_mold + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_c_hlg_mold diff --git a/gpu/impl/psb_c_hlg_reallocate_nz.F90 b/gpu/impl/psb_c_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..a27c3f55 --- /dev/null +++ b/gpu/impl/psb_c_hlg_reallocate_nz.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_reallocate_nz +#else + use psb_c_hlg_mat_mod +#endif + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_c_hll_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hlg_reallocate_nz diff --git a/gpu/impl/psb_c_hlg_scal.F90 b/gpu/impl/psb_c_hlg_scal.F90 new file mode 100644 index 00000000..b2c9d30d --- /dev/null +++ b/gpu/impl/psb_c_hlg_scal.F90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_scal +#else + use psb_c_hlg_mat_mod +#endif + implicit none + class(psb_c_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hlg_scal diff --git a/gpu/impl/psb_c_hlg_scals.F90 b/gpu/impl/psb_c_hlg_scals.F90 new file mode 100644 index 00000000..af2efb19 --- /dev/null +++ b/gpu/impl/psb_c_hlg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_scals +#else + use psb_c_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_c_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_c_hlg_scals diff --git a/gpu/impl/psb_c_hlg_to_gpu.F90 b/gpu/impl/psb_c_hlg_to_gpu.F90 new file mode 100644 index 00000000..0d37bc24 --- /dev/null +++ b/gpu/impl/psb_c_hlg_to_gpu.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_to_gpu +#else + use psb_c_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_c_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_complex_float,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_c_hlg_to_gpu diff --git a/gpu/impl/psb_c_hlg_vect_mv.F90 b/gpu/impl/psb_c_hlg_vect_mv.F90 new file mode 100644 index 00000000..bc4e2f56 --- /dev/null +++ b/gpu/impl/psb_c_hlg_vect_mv.F90 @@ -0,0 +1,129 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_vect_mv +#else + use psb_c_hlg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hlg_vect_mv diff --git a/gpu/impl/psb_c_hybg_allocate_mnnz.F90 b/gpu/impl/psb_c_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..5cd57fa2 --- /dev/null +++ b/gpu/impl/psb_c_hybg_allocate_mnnz.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_allocate_mnnz +#else + use psb_c_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hybg_allocate_mnnz +#endif diff --git a/gpu/impl/psb_c_hybg_csmm.F90 b/gpu/impl/psb_c_hybg_csmm.F90 new file mode 100644 index 00000000..7c8bb582 --- /dev/null +++ b/gpu/impl/psb_c_hybg_csmm.F90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_csmm +#else + use psb_c_hybg_mat_mod +#endif + implicit none + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_hybg_csmv +#else + use psb_c_hybg_mat_mod +#endif + implicit none + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_hybg_inner_vect_sv +#else + use psb_c_hybg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=czero)) then + call x%sync() + call y%sync() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hybg_inner_vect_sv +#endif diff --git a/gpu/impl/psb_c_hybg_mold.F90 b/gpu/impl/psb_c_hybg_mold.F90 new file mode 100644 index 00000000..54dd24c2 --- /dev/null +++ b/gpu/impl/psb_c_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_mold(a,b,info) + + use psb_base_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_mold + implicit none + class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hybg_mold +#endif diff --git a/gpu/impl/psb_c_hybg_reallocate_nz.F90 b/gpu/impl/psb_c_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..3272b797 --- /dev/null +++ b/gpu/impl/psb_c_hybg_reallocate_nz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_reallocate_nz +#else + use psb_c_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_c_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hybg_reallocate_nz +#endif diff --git a/gpu/impl/psb_c_hybg_scal.F90 b/gpu/impl/psb_c_hybg_scal.F90 new file mode 100644 index 00000000..1019f979 --- /dev/null +++ b/gpu/impl/psb_c_hybg_scal.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_scal +#else + use psb_c_hybg_mat_mod +#endif + implicit none + class(psb_c_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hybg_scal +#endif diff --git a/gpu/impl/psb_c_hybg_scals.F90 b/gpu/impl/psb_c_hybg_scals.F90 new file mode 100644 index 00000000..1d09abbb --- /dev/null +++ b/gpu/impl/psb_c_hybg_scals.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_scals +#else + use psb_c_hybg_mat_mod +#endif + implicit none + class(psb_c_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_c_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hybg_scals +#endif diff --git a/gpu/impl/psb_c_hybg_to_gpu.F90 b/gpu/impl/psb_c_hybg_to_gpu.F90 new file mode 100644 index 00000000..107efba9 --- /dev/null +++ b/gpu/impl/psb_c_hybg_to_gpu.F90 @@ -0,0 +1,154 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_to_gpu +#else + use psb_c_hybg_mat_mod +#endif + implicit none + class(psb_c_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_spk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if +#endif + +end subroutine psb_c_hybg_to_gpu +#endif diff --git a/gpu/impl/psb_c_hybg_vect_mv.F90 b/gpu/impl/psb_c_hybg_vect_mv.F90 new file mode 100644 index 00000000..3ed0f7fd --- /dev/null +++ b/gpu/impl/psb_c_hybg_vect_mv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_vect_mv +#else + use psb_c_hybg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hybg_vect_mv +#endif diff --git a/gpu/impl/psb_c_mv_csrg_from_coo.F90 b/gpu/impl/psb_c_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..d2533c2d --- /dev/null +++ b/gpu/impl/psb_c_mv_csrg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_mv_csrg_from_coo +#else + use psb_c_csrg_mat_mod +#endif + implicit none + + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_csrg_from_coo diff --git a/gpu/impl/psb_c_mv_csrg_from_fmt.F90 b/gpu/impl/psb_c_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..3e898e8f --- /dev/null +++ b/gpu/impl/psb_c_mv_csrg_from_fmt.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_mv_csrg_from_fmt +#else + use psb_c_csrg_mat_mod +#endif + implicit none + + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_c_mv_csrg_from_fmt diff --git a/gpu/impl/psb_c_mv_diag_from_coo.F90 b/gpu/impl/psb_c_mv_diag_from_coo.F90 new file mode 100644 index 00000000..34fe69b7 --- /dev/null +++ b/gpu/impl/psb_c_mv_diag_from_coo.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_c_diag_mat_mod, psb_protect_name => psb_c_mv_diag_from_coo +#else + use psb_c_diag_mat_mod +#endif + + implicit none + + class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_diag_from_coo diff --git a/gpu/impl/psb_c_mv_elg_from_coo.F90 b/gpu/impl/psb_c_mv_elg_from_coo.F90 new file mode 100644 index 00000000..acf7e28c --- /dev/null +++ b/gpu/impl/psb_c_mv_elg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_mv_elg_from_coo +#else + use psb_c_elg_mat_mod +#endif + implicit none + + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + + +end subroutine psb_c_mv_elg_from_coo diff --git a/gpu/impl/psb_c_mv_elg_from_fmt.F90 b/gpu/impl/psb_c_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..fb9e3cfe --- /dev/null +++ b/gpu/impl/psb_c_mv_elg_from_fmt.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_mv_elg_from_fmt +#else + use psb_c_elg_mat_mod +#endif + implicit none + + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_c_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_elg_from_fmt diff --git a/gpu/impl/psb_c_mv_hdiag_from_coo.F90 b/gpu/impl/psb_c_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..1d07bddb --- /dev/null +++ b/gpu/impl/psb_c_mv_hdiag_from_coo.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_mv_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_c_hdiag_mat_mod +#endif + + implicit none + + class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_c_hdia_sparse_mat%mv_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_hdiag_from_coo diff --git a/gpu/impl/psb_c_mv_hlg_from_coo.F90 b/gpu/impl/psb_c_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..0fa2d72d --- /dev/null +++ b/gpu/impl/psb_c_mv_hlg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_mv_hlg_from_coo +#else + use psb_c_hlg_mat_mod +#endif + implicit none + + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_c_mv_hlg_from_coo diff --git a/gpu/impl/psb_c_mv_hlg_from_fmt.F90 b/gpu/impl/psb_c_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..0581c7d6 --- /dev/null +++ b/gpu/impl/psb_c_mv_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_mv_hlg_from_fmt +#else + use psb_c_hlg_mat_mod +#endif + implicit none + + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_hlg_from_fmt diff --git a/gpu/impl/psb_c_mv_hybg_from_coo.F90 b/gpu/impl/psb_c_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..7aca6065 --- /dev/null +++ b/gpu/impl/psb_c_mv_hybg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_mv_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_mv_hybg_from_coo +#else + use psb_c_hybg_mat_mod +#endif + implicit none + + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_hybg_from_coo +#endif diff --git a/gpu/impl/psb_c_mv_hybg_from_fmt.F90 b/gpu/impl/psb_c_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..41581b85 --- /dev/null +++ b/gpu/impl/psb_c_mv_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_mv_hybg_from_fmt +#else + use psb_c_hybg_mat_mod +#endif + implicit none + + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select +end subroutine psb_c_mv_hybg_from_fmt +#endif diff --git a/gpu/impl/psb_d_cp_csrg_from_coo.F90 b/gpu/impl/psb_d_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..ec00007e --- /dev/null +++ b/gpu/impl/psb_d_cp_csrg_from_coo.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cp_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_cp_csrg_from_coo +#else + use psb_d_csrg_mat_mod +#endif + implicit none + + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_csrg_from_coo diff --git a/gpu/impl/psb_d_cp_csrg_from_fmt.F90 b/gpu/impl/psb_d_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..b3aabeed --- /dev/null +++ b/gpu/impl/psb_d_cp_csrg_from_fmt.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_cp_csrg_from_fmt +#else + use psb_d_csrg_mat_mod +#endif + !use iso_c_binding + implicit none + + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_d_cp_csrg_from_fmt diff --git a/gpu/impl/psb_d_cp_diag_from_coo.F90 b/gpu/impl/psb_d_cp_diag_from_coo.F90 new file mode 100644 index 00000000..06aff19d --- /dev/null +++ b/gpu/impl/psb_d_cp_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_d_diag_mat_mod, psb_protect_name => psb_d_cp_diag_from_coo +#else + use psb_d_diag_mat_mod +#endif + implicit none + + class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_d_dia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_diag_from_coo diff --git a/gpu/impl/psb_d_cp_elg_from_coo.F90 b/gpu/impl/psb_d_cp_elg_from_coo.F90 new file mode 100644 index 00000000..381e4bfb --- /dev/null +++ b/gpu/impl/psb_d_cp_elg_from_coo.F90 @@ -0,0 +1,184 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_cp_elg_from_coo + use psi_ext_util_mod + use psb_gpu_env_mod +#else + use psb_d_elg_mat_mod +#endif + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ +#ifdef HAVE_SPGPU + hacksize = max(1,psb_gpu_WarpSize()) +#else + hacksize = 1 +#endif + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + +#ifdef HAVE_SPGPU + call psi_d_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() +#else + + call psi_d_convert_ell_from_coo(a,b,info,hacksize=hacksize) + call a%set_host() +#endif + + else + call b%cp_to_coo(tmp,info) +#ifdef HAVE_SPGPU + call psi_d_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() +#else + + call psi_d_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) + call a%set_host() +#endif + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_d_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_d_count_ell_from_coo + +end subroutine psb_d_cp_elg_from_coo diff --git a/gpu/impl/psb_d_cp_elg_from_fmt.F90 b/gpu/impl/psb_d_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..9a6b6d41 --- /dev/null +++ b/gpu/impl/psb_d_cp_elg_from_fmt.F90 @@ -0,0 +1,101 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_cp_elg_from_fmt +#else + use psb_d_elg_mat_mod +#endif + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_d_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cp_elg_from_fmt diff --git a/gpu/impl/psb_d_cp_hdiag_from_coo.F90 b/gpu/impl/psb_d_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..443452a1 --- /dev/null +++ b/gpu/impl/psb_d_cp_hdiag_from_coo.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_cp_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_d_hdiag_mat_mod +#endif + implicit none + + class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_d_hdia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_hdiag_from_coo diff --git a/gpu/impl/psb_d_cp_hlg_from_coo.F90 b/gpu/impl/psb_d_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..02855fef --- /dev/null +++ b/gpu/impl/psb_d_cp_hlg_from_coo.F90 @@ -0,0 +1,198 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_cp_hlg_from_coo +#else + use psb_d_hlg_mat_mod +#endif + implicit none + + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() +#ifdef HAVE_SPGPU + hksz = max(1,psb_gpu_WarpSize()) +#else + hksz = psi_get_hksz() +#endif + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_d_cp_hlg_from_coo diff --git a/gpu/impl/psb_d_cp_hlg_from_fmt.F90 b/gpu/impl/psb_d_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..133fbb32 --- /dev/null +++ b/gpu/impl/psb_d_cp_hlg_from_fmt.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_cp_hlg_from_fmt +#else + use psb_d_hlg_mat_mod +#endif + implicit none + + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_d_hll_sparse_mat%cp_from_fmt(b,info) +#ifdef HAVE_SPGPU + if (info == 0) call a%to_gpu(info) +#endif + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_hlg_from_fmt diff --git a/gpu/impl/psb_d_cp_hybg_from_coo.F90 b/gpu/impl/psb_d_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..a74409cb --- /dev/null +++ b/gpu/impl/psb_d_cp_hybg_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cp_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_cp_hybg_from_coo +#else + use psb_d_hybg_mat_mod +#endif + implicit none + + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_hybg_from_coo +#endif diff --git a/gpu/impl/psb_d_cp_hybg_from_fmt.F90 b/gpu/impl/psb_d_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..91d59060 --- /dev/null +++ b/gpu/impl/psb_d_cp_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_cp_hybg_from_fmt +#else + use psb_d_hybg_mat_mod +#endif + implicit none + + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_d_cp_hybg_from_fmt +#endif diff --git a/gpu/impl/psb_d_csrg_allocate_mnnz.F90 b/gpu/impl/psb_d_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..7d2d4470 --- /dev/null +++ b/gpu/impl/psb_d_csrg_allocate_mnnz.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_allocate_mnnz +#else + use psb_d_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_csrg_allocate_mnnz diff --git a/gpu/impl/psb_d_csrg_csmm.F90 b/gpu/impl/psb_d_csrg_csmm.F90 new file mode 100644 index 00000000..59c8343e --- /dev/null +++ b/gpu/impl/psb_d_csrg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_csmm +#else + use psb_d_csrg_mat_mod +#endif + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_csrg_csmv +#else + use psb_d_csrg_mat_mod +#endif + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_csrg_from_gpu +#else + use psb_d_csrg_mat_mod +#endif + implicit none + class(psb_d_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() +#endif + +end subroutine psb_d_csrg_from_gpu diff --git a/gpu/impl/psb_d_csrg_inner_vect_sv.F90 b/gpu/impl/psb_d_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..016d63d6 --- /dev/null +++ b/gpu/impl/psb_d_csrg_inner_vect_sv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_inner_vect_sv +#else + use psb_d_csrg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_csrg_inner_vect_sv diff --git a/gpu/impl/psb_d_csrg_mold.F90 b/gpu/impl/psb_d_csrg_mold.F90 new file mode 100644 index 00000000..d7288868 --- /dev/null +++ b/gpu/impl/psb_d_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_mold(a,b,info) + + use psb_base_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_mold + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_csrg_mold diff --git a/gpu/impl/psb_d_csrg_reallocate_nz.F90 b/gpu/impl/psb_d_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..083091f5 --- /dev/null +++ b/gpu/impl/psb_d_csrg_reallocate_nz.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_reallocate_nz +#else + use psb_d_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_d_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_csrg_reallocate_nz diff --git a/gpu/impl/psb_d_csrg_scal.F90 b/gpu/impl/psb_d_csrg_scal.F90 new file mode 100644 index 00000000..60dbaecd --- /dev/null +++ b/gpu/impl/psb_d_csrg_scal.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_scal +#else + use psb_d_csrg_mat_mod +#endif + implicit none + class(psb_d_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_d_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_csrg_scal diff --git a/gpu/impl/psb_d_csrg_scals.F90 b/gpu/impl/psb_d_csrg_scals.F90 new file mode 100644 index 00000000..6d4a1f40 --- /dev/null +++ b/gpu/impl/psb_d_csrg_scals.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_scals +#else + use psb_d_csrg_mat_mod +#endif + implicit none + class(psb_d_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_d_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_csrg_scals diff --git a/gpu/impl/psb_d_csrg_to_gpu.F90 b/gpu/impl/psb_d_csrg_to_gpu.F90 new file mode 100644 index 00000000..eb5d3942 --- /dev/null +++ b/gpu/impl/psb_d_csrg_to_gpu.F90 @@ -0,0 +1,325 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_to_gpu +#else + use psb_d_csrg_mat_mod +#endif + implicit none + class(psb_d_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_dpk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if CUDA_SHORT_VERSION <= 10 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if +#endif + +end subroutine psb_d_csrg_to_gpu diff --git a/gpu/impl/psb_d_csrg_vect_mv.F90 b/gpu/impl/psb_d_csrg_vect_mv.F90 new file mode 100644 index 00000000..f7124bbb --- /dev/null +++ b/gpu/impl/psb_d_csrg_vect_mv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_vect_mv +#else + use psb_d_csrg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_d_csrg_vect_mv diff --git a/gpu/impl/psb_d_diag_csmv.F90 b/gpu/impl/psb_d_diag_csmv.F90 new file mode 100644 index 00000000..af9ad2db --- /dev/null +++ b/gpu/impl/psb_d_diag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_csmv +#else + use psb_d_diag_mat_mod +#endif + implicit none + class(psb_d_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_diag_mold + implicit none + class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_diag_mold diff --git a/gpu/impl/psb_d_diag_to_gpu.F90 b/gpu/impl/psb_d_diag_to_gpu.F90 new file mode 100644 index 00000000..de244124 --- /dev/null +++ b/gpu/impl/psb_d_diag_to_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_diag_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_to_gpu +#else + use psb_d_diag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_d_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d +#ifdef HAVE_SPGPU + type(diagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_double) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_double) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_d_diag_to_gpu diff --git a/gpu/impl/psb_d_diag_vect_mv.F90 b/gpu/impl/psb_d_diag_vect_mv.F90 new file mode 100644 index 00000000..3f2f5ac6 --- /dev/null +++ b/gpu/impl/psb_d_diag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_vect_mv +#else + use psb_d_diag_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_d_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_diag_vect_mv diff --git a/gpu/impl/psb_d_dnsg_mat_impl.F90 b/gpu/impl/psb_d_dnsg_mat_impl.F90 new file mode 100644 index 00000000..a7915898 --- /dev/null +++ b/gpu/impl/psb_d_dnsg_mat_impl.F90 @@ -0,0 +1,461 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_d_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_dnsg_vect_mv +#else + use psb_d_dnsg_mat_mod +#endif + implicit none + class(psb_d_dnsg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + real(psb_dpk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='d_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_dnsg_vect_mv + + +subroutine psb_d_dnsg_mold(a,b,info) + use psb_base_mod + use psb_d_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_dnsg_mold +#else + use psb_d_dnsg_mat_mod +#endif + implicit none + class(psb_d_dnsg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_d_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_d_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_d_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_d_dnsg_allocate_mnnz +!!$ end interface + + +subroutine psb_d_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_d_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_dnsg_to_gpu +#else + use psb_d_dnsg_mat_mod +#endif + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='d_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ +#ifdef HAVE_SPGPU + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_double,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_dnsg_to_gpu + + + +subroutine psb_d_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_d_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_cp_dnsg_from_coo +#else + use psb_d_dnsg_mat_mod +#endif + implicit none + + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_d_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_d_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cp_dnsg_from_coo + +subroutine psb_d_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_d_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_cp_dnsg_from_fmt +#else + use psb_d_dnsg_mat_mod +#endif + implicit none + + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_d_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cp_dnsg_from_fmt + + + +subroutine psb_d_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_d_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_mv_dnsg_from_coo +#else + use psb_d_dnsg_mat_mod +#endif + implicit none + + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='d_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_mv_dnsg_from_coo + + +subroutine psb_d_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_d_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_mv_dnsg_from_fmt +#else + use psb_d_dnsg_mat_mod +#endif + implicit none + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_d_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_d_mv_dnsg_from_fmt diff --git a/gpu/impl/psb_d_elg_allocate_mnnz.F90 b/gpu/impl/psb_d_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..105f5617 --- /dev/null +++ b/gpu/impl/psb_d_elg_allocate_mnnz.F90 @@ -0,0 +1,113 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_allocate_mnnz +#else + use psb_d_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_double,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize +#else + ld = m +#endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_allocate_mnnz diff --git a/gpu/impl/psb_d_elg_asb.f90 b/gpu/impl/psb_d_elg_asb.f90 new file mode 100644 index 00000000..f80537ef --- /dev/null +++ b/gpu/impl/psb_d_elg_asb.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_asb(a) + + use psb_base_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_asb + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_asb diff --git a/gpu/impl/psb_d_elg_csmm.F90 b/gpu/impl/psb_d_elg_csmm.F90 new file mode 100644 index 00000000..add9c3b2 --- /dev/null +++ b/gpu/impl/psb_d_elg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csmm +#else + use psb_d_elg_mat_mod +#endif + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_elg_csmv +#else + use psb_d_elg_mat_mod +#endif + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_elg_csput_a +#else + use psb_d_elg_mat_mod +#endif + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_d_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_csput_a + + + +subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding +#ifdef HAVE_SPGPU + use elldev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csput_v + use psb_d_gpu_vect_mod +#else + use psb_d_elg_mat_mod +#endif + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_gpu) + select type (ja) + class is (psb_i_vect_gpu) + select type (val) + class is (psb_d_vect_gpu) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceDouble(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_d_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_d_elg_csput_v diff --git a/gpu/impl/psb_d_elg_from_gpu.F90 b/gpu/impl/psb_d_elg_from_gpu.F90 new file mode 100644 index 00000000..c1da9584 --- /dev/null +++ b/gpu/impl/psb_d_elg_from_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_from_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_from_gpu +#else + use psb_d_elg_mat_mod +#endif + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_d_elg_from_gpu diff --git a/gpu/impl/psb_d_elg_inner_vect_sv.F90 b/gpu/impl/psb_d_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..333946bf --- /dev/null +++ b/gpu/impl/psb_d_elg_inner_vect_sv.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_inner_vect_sv +#else + use psb_d_elg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_elg_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_d_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_inner_vect_sv diff --git a/gpu/impl/psb_d_elg_mold.F90 b/gpu/impl/psb_d_elg_mold.F90 new file mode 100644 index 00000000..3fd6d071 --- /dev/null +++ b/gpu/impl/psb_d_elg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_mold(a,b,info) + + use psb_base_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_mold + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_mold diff --git a/gpu/impl/psb_d_elg_reallocate_nz.F90 b/gpu/impl/psb_d_elg_reallocate_nz.F90 new file mode 100644 index 00000000..70b3705c --- /dev/null +++ b/gpu/impl/psb_d_elg_reallocate_nz.F90 @@ -0,0 +1,79 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_reallocate_nz +#else + use psb_d_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_reallocate_nz diff --git a/gpu/impl/psb_d_elg_scal.F90 b/gpu/impl/psb_d_elg_scal.F90 new file mode 100644 index 00000000..53ab82d7 --- /dev/null +++ b/gpu/impl/psb_d_elg_scal.F90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_scal +#else + use psb_d_elg_mat_mod +#endif + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_scal diff --git a/gpu/impl/psb_d_elg_scals.F90 b/gpu/impl/psb_d_elg_scals.F90 new file mode 100644 index 00000000..f85780ce --- /dev/null +++ b/gpu/impl/psb_d_elg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_scals +#else + use psb_d_elg_mat_mod +#endif + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_scals diff --git a/gpu/impl/psb_d_elg_to_gpu.F90 b/gpu/impl/psb_d_elg_to_gpu.F90 new file mode 100644 index 00000000..28e61606 --- /dev/null +++ b/gpu/impl/psb_d_elg_to_gpu.F90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_to_gpu +#else + use psb_d_elg_mat_mod +#endif + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_double,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_double,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_d_elg_to_gpu diff --git a/gpu/impl/psb_d_elg_trim.f90 b/gpu/impl/psb_d_elg_trim.f90 new file mode 100644 index 00000000..d2a2047c --- /dev/null +++ b/gpu/impl/psb_d_elg_trim.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_trim(a) + + use psb_base_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_trim + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_trim diff --git a/gpu/impl/psb_d_elg_vect_mv.F90 b/gpu/impl/psb_d_elg_vect_mv.F90 new file mode 100644 index 00000000..e46f84da --- /dev/null +++ b/gpu/impl/psb_d_elg_vect_mv.F90 @@ -0,0 +1,131 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_vect_mv +#else + use psb_d_elg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + if (a%is_dev()) call a%sync() + call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_vect_mv diff --git a/gpu/impl/psb_d_hdiag_csmv.F90 b/gpu/impl/psb_d_hdiag_csmv.F90 new file mode 100644 index 00000000..6f6bcedf --- /dev/null +++ b/gpu/impl/psb_d_hdiag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_csmv +#else + use psb_d_hdiag_mat_mod +#endif + implicit none + class(psb_d_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_hdiag_mold + implicit none + class(psb_d_hdiag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hdiag_mold diff --git a/gpu/impl/psb_d_hdiag_to_gpu.F90 b/gpu/impl/psb_d_hdiag_to_gpu.F90 new file mode 100644 index 00000000..fb013586 --- /dev/null +++ b/gpu/impl/psb_d_hdiag_to_gpu.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hdiag_to_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_to_gpu +#else + use psb_d_hdiag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_d_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight +#ifdef HAVE_SPGPU + type(hdiagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +#endif + +end subroutine psb_d_hdiag_to_gpu diff --git a/gpu/impl/psb_d_hdiag_vect_mv.F90 b/gpu/impl/psb_d_hdiag_vect_mv.F90 new file mode 100644 index 00000000..db7ec9c6 --- /dev/null +++ b/gpu/impl/psb_d_hdiag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_vect_mv +#else + use psb_d_hdiag_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hdiag_vect_mv diff --git a/gpu/impl/psb_d_hlg_allocate_mnnz.F90 b/gpu/impl/psb_d_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..6f327e81 --- /dev/null +++ b/gpu/impl/psb_d_hlg_allocate_mnnz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_allocate_mnnz +#else + use psb_d_hlg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(hlldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_hll_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hlg_allocate_mnnz diff --git a/gpu/impl/psb_d_hlg_csmm.F90 b/gpu/impl/psb_d_hlg_csmm.F90 new file mode 100644 index 00000000..120f3e06 --- /dev/null +++ b/gpu/impl/psb_d_hlg_csmm.F90 @@ -0,0 +1,132 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_csmm +#else + use psb_d_hlg_mat_mod +#endif + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_hlg_csmv +#else + use psb_d_hlg_mat_mod +#endif + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_hlg_from_gpu +#else + use psb_d_hlg_mat_mod +#endif + implicit none + class(psb_d_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + +#ifdef HAVE_SPGPU + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_d_hlg_from_gpu diff --git a/gpu/impl/psb_d_hlg_inner_vect_sv.F90 b/gpu/impl/psb_d_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..0ad867a3 --- /dev/null +++ b/gpu/impl/psb_d_hlg_inner_vect_sv.F90 @@ -0,0 +1,81 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_inner_vect_sv +#else + use psb_d_hlg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_base_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_d_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hlg_inner_vect_sv diff --git a/gpu/impl/psb_d_hlg_mold.F90 b/gpu/impl/psb_d_hlg_mold.F90 new file mode 100644 index 00000000..3ce9f33a --- /dev/null +++ b/gpu/impl/psb_d_hlg_mold.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_mold(a,b,info) + + use psb_base_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_mold + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_d_hlg_mold diff --git a/gpu/impl/psb_d_hlg_reallocate_nz.F90 b/gpu/impl/psb_d_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..c9fa4771 --- /dev/null +++ b/gpu/impl/psb_d_hlg_reallocate_nz.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_reallocate_nz +#else + use psb_d_hlg_mat_mod +#endif + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_d_hll_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hlg_reallocate_nz diff --git a/gpu/impl/psb_d_hlg_scal.F90 b/gpu/impl/psb_d_hlg_scal.F90 new file mode 100644 index 00000000..b487303d --- /dev/null +++ b/gpu/impl/psb_d_hlg_scal.F90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_scal +#else + use psb_d_hlg_mat_mod +#endif + implicit none + class(psb_d_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hlg_scal diff --git a/gpu/impl/psb_d_hlg_scals.F90 b/gpu/impl/psb_d_hlg_scals.F90 new file mode 100644 index 00000000..e3f676e9 --- /dev/null +++ b/gpu/impl/psb_d_hlg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_scals +#else + use psb_d_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_d_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_d_hlg_scals diff --git a/gpu/impl/psb_d_hlg_to_gpu.F90 b/gpu/impl/psb_d_hlg_to_gpu.F90 new file mode 100644 index 00000000..5e3b3558 --- /dev/null +++ b/gpu/impl/psb_d_hlg_to_gpu.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_to_gpu +#else + use psb_d_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_d_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_double,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_d_hlg_to_gpu diff --git a/gpu/impl/psb_d_hlg_vect_mv.F90 b/gpu/impl/psb_d_hlg_vect_mv.F90 new file mode 100644 index 00000000..cd5e95e5 --- /dev/null +++ b/gpu/impl/psb_d_hlg_vect_mv.F90 @@ -0,0 +1,129 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_vect_mv +#else + use psb_d_hlg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hlg_vect_mv diff --git a/gpu/impl/psb_d_hybg_allocate_mnnz.F90 b/gpu/impl/psb_d_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..1565a719 --- /dev/null +++ b/gpu/impl/psb_d_hybg_allocate_mnnz.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_allocate_mnnz +#else + use psb_d_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hybg_allocate_mnnz +#endif diff --git a/gpu/impl/psb_d_hybg_csmm.F90 b/gpu/impl/psb_d_hybg_csmm.F90 new file mode 100644 index 00000000..abc0e0c2 --- /dev/null +++ b/gpu/impl/psb_d_hybg_csmm.F90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_csmm +#else + use psb_d_hybg_mat_mod +#endif + implicit none + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_hybg_csmv +#else + use psb_d_hybg_mat_mod +#endif + implicit none + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_hybg_inner_vect_sv +#else + use psb_d_hybg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hybg_inner_vect_sv +#endif diff --git a/gpu/impl/psb_d_hybg_mold.F90 b/gpu/impl/psb_d_hybg_mold.F90 new file mode 100644 index 00000000..27390db0 --- /dev/null +++ b/gpu/impl/psb_d_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_mold(a,b,info) + + use psb_base_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_mold + implicit none + class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hybg_mold +#endif diff --git a/gpu/impl/psb_d_hybg_reallocate_nz.F90 b/gpu/impl/psb_d_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..537101e9 --- /dev/null +++ b/gpu/impl/psb_d_hybg_reallocate_nz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_reallocate_nz +#else + use psb_d_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_d_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hybg_reallocate_nz +#endif diff --git a/gpu/impl/psb_d_hybg_scal.F90 b/gpu/impl/psb_d_hybg_scal.F90 new file mode 100644 index 00000000..32ef2da0 --- /dev/null +++ b/gpu/impl/psb_d_hybg_scal.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_scal +#else + use psb_d_hybg_mat_mod +#endif + implicit none + class(psb_d_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hybg_scal +#endif diff --git a/gpu/impl/psb_d_hybg_scals.F90 b/gpu/impl/psb_d_hybg_scals.F90 new file mode 100644 index 00000000..8c38328a --- /dev/null +++ b/gpu/impl/psb_d_hybg_scals.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_scals +#else + use psb_d_hybg_mat_mod +#endif + implicit none + class(psb_d_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_d_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hybg_scals +#endif diff --git a/gpu/impl/psb_d_hybg_to_gpu.F90 b/gpu/impl/psb_d_hybg_to_gpu.F90 new file mode 100644 index 00000000..33bf55b8 --- /dev/null +++ b/gpu/impl/psb_d_hybg_to_gpu.F90 @@ -0,0 +1,154 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_to_gpu +#else + use psb_d_hybg_mat_mod +#endif + implicit none + class(psb_d_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_dpk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if +#endif + +end subroutine psb_d_hybg_to_gpu +#endif diff --git a/gpu/impl/psb_d_hybg_vect_mv.F90 b/gpu/impl/psb_d_hybg_vect_mv.F90 new file mode 100644 index 00000000..d9653a48 --- /dev/null +++ b/gpu/impl/psb_d_hybg_vect_mv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_vect_mv +#else + use psb_d_hybg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hybg_vect_mv +#endif diff --git a/gpu/impl/psb_d_mv_csrg_from_coo.F90 b/gpu/impl/psb_d_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..8c59e6d1 --- /dev/null +++ b/gpu/impl/psb_d_mv_csrg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_mv_csrg_from_coo +#else + use psb_d_csrg_mat_mod +#endif + implicit none + + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_csrg_from_coo diff --git a/gpu/impl/psb_d_mv_csrg_from_fmt.F90 b/gpu/impl/psb_d_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..30c133e4 --- /dev/null +++ b/gpu/impl/psb_d_mv_csrg_from_fmt.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_mv_csrg_from_fmt +#else + use psb_d_csrg_mat_mod +#endif + implicit none + + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_d_mv_csrg_from_fmt diff --git a/gpu/impl/psb_d_mv_diag_from_coo.F90 b/gpu/impl/psb_d_mv_diag_from_coo.F90 new file mode 100644 index 00000000..f37a5523 --- /dev/null +++ b/gpu/impl/psb_d_mv_diag_from_coo.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_d_diag_mat_mod, psb_protect_name => psb_d_mv_diag_from_coo +#else + use psb_d_diag_mat_mod +#endif + + implicit none + + class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_diag_from_coo diff --git a/gpu/impl/psb_d_mv_elg_from_coo.F90 b/gpu/impl/psb_d_mv_elg_from_coo.F90 new file mode 100644 index 00000000..73216cfa --- /dev/null +++ b/gpu/impl/psb_d_mv_elg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_mv_elg_from_coo +#else + use psb_d_elg_mat_mod +#endif + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + + +end subroutine psb_d_mv_elg_from_coo diff --git a/gpu/impl/psb_d_mv_elg_from_fmt.F90 b/gpu/impl/psb_d_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..5038c50e --- /dev/null +++ b/gpu/impl/psb_d_mv_elg_from_fmt.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_mv_elg_from_fmt +#else + use psb_d_elg_mat_mod +#endif + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_d_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_elg_from_fmt diff --git a/gpu/impl/psb_d_mv_hdiag_from_coo.F90 b/gpu/impl/psb_d_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..ee0e983f --- /dev/null +++ b/gpu/impl/psb_d_mv_hdiag_from_coo.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_mv_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_d_hdiag_mat_mod +#endif + + implicit none + + class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_d_hdia_sparse_mat%mv_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_hdiag_from_coo diff --git a/gpu/impl/psb_d_mv_hlg_from_coo.F90 b/gpu/impl/psb_d_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..fe030415 --- /dev/null +++ b/gpu/impl/psb_d_mv_hlg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_mv_hlg_from_coo +#else + use psb_d_hlg_mat_mod +#endif + implicit none + + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_d_mv_hlg_from_coo diff --git a/gpu/impl/psb_d_mv_hlg_from_fmt.F90 b/gpu/impl/psb_d_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..e538b017 --- /dev/null +++ b/gpu/impl/psb_d_mv_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_mv_hlg_from_fmt +#else + use psb_d_hlg_mat_mod +#endif + implicit none + + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_hlg_from_fmt diff --git a/gpu/impl/psb_d_mv_hybg_from_coo.F90 b/gpu/impl/psb_d_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..4fe76c72 --- /dev/null +++ b/gpu/impl/psb_d_mv_hybg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_mv_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_mv_hybg_from_coo +#else + use psb_d_hybg_mat_mod +#endif + implicit none + + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_hybg_from_coo +#endif diff --git a/gpu/impl/psb_d_mv_hybg_from_fmt.F90 b/gpu/impl/psb_d_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..454533d0 --- /dev/null +++ b/gpu/impl/psb_d_mv_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_mv_hybg_from_fmt +#else + use psb_d_hybg_mat_mod +#endif + implicit none + + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select +end subroutine psb_d_mv_hybg_from_fmt +#endif diff --git a/gpu/impl/psb_s_cp_csrg_from_coo.F90 b/gpu/impl/psb_s_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..4a714d41 --- /dev/null +++ b/gpu/impl/psb_s_cp_csrg_from_coo.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cp_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_cp_csrg_from_coo +#else + use psb_s_csrg_mat_mod +#endif + implicit none + + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_csrg_from_coo diff --git a/gpu/impl/psb_s_cp_csrg_from_fmt.F90 b/gpu/impl/psb_s_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..962a8c9d --- /dev/null +++ b/gpu/impl/psb_s_cp_csrg_from_fmt.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_cp_csrg_from_fmt +#else + use psb_s_csrg_mat_mod +#endif + !use iso_c_binding + implicit none + + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_s_cp_csrg_from_fmt diff --git a/gpu/impl/psb_s_cp_diag_from_coo.F90 b/gpu/impl/psb_s_cp_diag_from_coo.F90 new file mode 100644 index 00000000..6b105ef2 --- /dev/null +++ b/gpu/impl/psb_s_cp_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_s_diag_mat_mod, psb_protect_name => psb_s_cp_diag_from_coo +#else + use psb_s_diag_mat_mod +#endif + implicit none + + class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_s_dia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_diag_from_coo diff --git a/gpu/impl/psb_s_cp_elg_from_coo.F90 b/gpu/impl/psb_s_cp_elg_from_coo.F90 new file mode 100644 index 00000000..af8c7d28 --- /dev/null +++ b/gpu/impl/psb_s_cp_elg_from_coo.F90 @@ -0,0 +1,184 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_cp_elg_from_coo + use psi_ext_util_mod + use psb_gpu_env_mod +#else + use psb_s_elg_mat_mod +#endif + implicit none + + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ +#ifdef HAVE_SPGPU + hacksize = max(1,psb_gpu_WarpSize()) +#else + hacksize = 1 +#endif + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + +#ifdef HAVE_SPGPU + call psi_s_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() +#else + + call psi_s_convert_ell_from_coo(a,b,info,hacksize=hacksize) + call a%set_host() +#endif + + else + call b%cp_to_coo(tmp,info) +#ifdef HAVE_SPGPU + call psi_s_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() +#else + + call psi_s_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) + call a%set_host() +#endif + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_s_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_s_count_ell_from_coo + +end subroutine psb_s_cp_elg_from_coo diff --git a/gpu/impl/psb_s_cp_elg_from_fmt.F90 b/gpu/impl/psb_s_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..c3d973e1 --- /dev/null +++ b/gpu/impl/psb_s_cp_elg_from_fmt.F90 @@ -0,0 +1,101 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_cp_elg_from_fmt +#else + use psb_s_elg_mat_mod +#endif + implicit none + + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_s_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cp_elg_from_fmt diff --git a/gpu/impl/psb_s_cp_hdiag_from_coo.F90 b/gpu/impl/psb_s_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..0509706d --- /dev/null +++ b/gpu/impl/psb_s_cp_hdiag_from_coo.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_cp_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_s_hdiag_mat_mod +#endif + implicit none + + class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_s_hdia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_hdiag_from_coo diff --git a/gpu/impl/psb_s_cp_hlg_from_coo.F90 b/gpu/impl/psb_s_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..5988c8dd --- /dev/null +++ b/gpu/impl/psb_s_cp_hlg_from_coo.F90 @@ -0,0 +1,198 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_cp_hlg_from_coo +#else + use psb_s_hlg_mat_mod +#endif + implicit none + + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() +#ifdef HAVE_SPGPU + hksz = max(1,psb_gpu_WarpSize()) +#else + hksz = psi_get_hksz() +#endif + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_s_cp_hlg_from_coo diff --git a/gpu/impl/psb_s_cp_hlg_from_fmt.F90 b/gpu/impl/psb_s_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..41c20866 --- /dev/null +++ b/gpu/impl/psb_s_cp_hlg_from_fmt.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_cp_hlg_from_fmt +#else + use psb_s_hlg_mat_mod +#endif + implicit none + + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_s_hll_sparse_mat%cp_from_fmt(b,info) +#ifdef HAVE_SPGPU + if (info == 0) call a%to_gpu(info) +#endif + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_hlg_from_fmt diff --git a/gpu/impl/psb_s_cp_hybg_from_coo.F90 b/gpu/impl/psb_s_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..92dc4a68 --- /dev/null +++ b/gpu/impl/psb_s_cp_hybg_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cp_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_cp_hybg_from_coo +#else + use psb_s_hybg_mat_mod +#endif + implicit none + + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_hybg_from_coo +#endif diff --git a/gpu/impl/psb_s_cp_hybg_from_fmt.F90 b/gpu/impl/psb_s_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..53143776 --- /dev/null +++ b/gpu/impl/psb_s_cp_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_cp_hybg_from_fmt +#else + use psb_s_hybg_mat_mod +#endif + implicit none + + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_s_cp_hybg_from_fmt +#endif diff --git a/gpu/impl/psb_s_csrg_allocate_mnnz.F90 b/gpu/impl/psb_s_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..e93452d2 --- /dev/null +++ b/gpu/impl/psb_s_csrg_allocate_mnnz.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_allocate_mnnz +#else + use psb_s_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_csrg_allocate_mnnz diff --git a/gpu/impl/psb_s_csrg_csmm.F90 b/gpu/impl/psb_s_csrg_csmm.F90 new file mode 100644 index 00000000..55087053 --- /dev/null +++ b/gpu/impl/psb_s_csrg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_csmm +#else + use psb_s_csrg_mat_mod +#endif + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_csrg_csmv +#else + use psb_s_csrg_mat_mod +#endif + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_csrg_from_gpu +#else + use psb_s_csrg_mat_mod +#endif + implicit none + class(psb_s_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() +#endif + +end subroutine psb_s_csrg_from_gpu diff --git a/gpu/impl/psb_s_csrg_inner_vect_sv.F90 b/gpu/impl/psb_s_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..133a6350 --- /dev/null +++ b/gpu/impl/psb_s_csrg_inner_vect_sv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_inner_vect_sv +#else + use psb_s_csrg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_csrg_inner_vect_sv diff --git a/gpu/impl/psb_s_csrg_mold.F90 b/gpu/impl/psb_s_csrg_mold.F90 new file mode 100644 index 00000000..6ac4cc3d --- /dev/null +++ b/gpu/impl/psb_s_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_mold(a,b,info) + + use psb_base_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_mold + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_csrg_mold diff --git a/gpu/impl/psb_s_csrg_reallocate_nz.F90 b/gpu/impl/psb_s_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..dd9a50d0 --- /dev/null +++ b/gpu/impl/psb_s_csrg_reallocate_nz.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_reallocate_nz +#else + use psb_s_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_s_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_csrg_reallocate_nz diff --git a/gpu/impl/psb_s_csrg_scal.F90 b/gpu/impl/psb_s_csrg_scal.F90 new file mode 100644 index 00000000..5e0fbcf0 --- /dev/null +++ b/gpu/impl/psb_s_csrg_scal.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_scal +#else + use psb_s_csrg_mat_mod +#endif + implicit none + class(psb_s_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_s_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_csrg_scal diff --git a/gpu/impl/psb_s_csrg_scals.F90 b/gpu/impl/psb_s_csrg_scals.F90 new file mode 100644 index 00000000..54b299a1 --- /dev/null +++ b/gpu/impl/psb_s_csrg_scals.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_scals +#else + use psb_s_csrg_mat_mod +#endif + implicit none + class(psb_s_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_s_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_csrg_scals diff --git a/gpu/impl/psb_s_csrg_to_gpu.F90 b/gpu/impl/psb_s_csrg_to_gpu.F90 new file mode 100644 index 00000000..f90ae4ea --- /dev/null +++ b/gpu/impl/psb_s_csrg_to_gpu.F90 @@ -0,0 +1,325 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_to_gpu +#else + use psb_s_csrg_mat_mod +#endif + implicit none + class(psb_s_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_spk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if CUDA_SHORT_VERSION <= 10 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if +#endif + +end subroutine psb_s_csrg_to_gpu diff --git a/gpu/impl/psb_s_csrg_vect_mv.F90 b/gpu/impl/psb_s_csrg_vect_mv.F90 new file mode 100644 index 00000000..ff88bf89 --- /dev/null +++ b/gpu/impl/psb_s_csrg_vect_mv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_vect_mv +#else + use psb_s_csrg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_s_csrg_vect_mv diff --git a/gpu/impl/psb_s_diag_csmv.F90 b/gpu/impl/psb_s_diag_csmv.F90 new file mode 100644 index 00000000..4cf14d12 --- /dev/null +++ b/gpu/impl/psb_s_diag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_csmv +#else + use psb_s_diag_mat_mod +#endif + implicit none + class(psb_s_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='s_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_diag_mold + implicit none + class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_diag_mold diff --git a/gpu/impl/psb_s_diag_to_gpu.F90 b/gpu/impl/psb_s_diag_to_gpu.F90 new file mode 100644 index 00000000..bb09b127 --- /dev/null +++ b/gpu/impl/psb_s_diag_to_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_diag_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_to_gpu +#else + use psb_s_diag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_s_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d +#ifdef HAVE_SPGPU + type(diagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_float) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_float) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_s_diag_to_gpu diff --git a/gpu/impl/psb_s_diag_vect_mv.F90 b/gpu/impl/psb_s_diag_vect_mv.F90 new file mode 100644 index 00000000..31976247 --- /dev/null +++ b/gpu/impl/psb_s_diag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_vect_mv +#else + use psb_s_diag_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_s_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_diag_vect_mv diff --git a/gpu/impl/psb_s_dnsg_mat_impl.F90 b/gpu/impl/psb_s_dnsg_mat_impl.F90 new file mode 100644 index 00000000..13c58985 --- /dev/null +++ b/gpu/impl/psb_s_dnsg_mat_impl.F90 @@ -0,0 +1,461 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_s_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_dnsg_vect_mv +#else + use psb_s_dnsg_mat_mod +#endif + implicit none + class(psb_s_dnsg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + real(psb_spk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='s_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_dnsg_vect_mv + + +subroutine psb_s_dnsg_mold(a,b,info) + use psb_base_mod + use psb_s_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_dnsg_mold +#else + use psb_s_dnsg_mat_mod +#endif + implicit none + class(psb_s_dnsg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_s_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_s_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_s_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_s_dnsg_allocate_mnnz +!!$ end interface + + +subroutine psb_s_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_s_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_dnsg_to_gpu +#else + use psb_s_dnsg_mat_mod +#endif + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='s_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ +#ifdef HAVE_SPGPU + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_float,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_dnsg_to_gpu + + + +subroutine psb_s_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_s_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_cp_dnsg_from_coo +#else + use psb_s_dnsg_mat_mod +#endif + implicit none + + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_s_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_s_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cp_dnsg_from_coo + +subroutine psb_s_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_s_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_cp_dnsg_from_fmt +#else + use psb_s_dnsg_mat_mod +#endif + implicit none + + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_s_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cp_dnsg_from_fmt + + + +subroutine psb_s_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_s_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_mv_dnsg_from_coo +#else + use psb_s_dnsg_mat_mod +#endif + implicit none + + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='s_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_mv_dnsg_from_coo + + +subroutine psb_s_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_s_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_mv_dnsg_from_fmt +#else + use psb_s_dnsg_mat_mod +#endif + implicit none + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_s_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_s_mv_dnsg_from_fmt diff --git a/gpu/impl/psb_s_elg_allocate_mnnz.F90 b/gpu/impl/psb_s_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..f3b1d743 --- /dev/null +++ b/gpu/impl/psb_s_elg_allocate_mnnz.F90 @@ -0,0 +1,113 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_allocate_mnnz +#else + use psb_s_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_float,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize +#else + ld = m +#endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_allocate_mnnz diff --git a/gpu/impl/psb_s_elg_asb.f90 b/gpu/impl/psb_s_elg_asb.f90 new file mode 100644 index 00000000..190be710 --- /dev/null +++ b/gpu/impl/psb_s_elg_asb.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_asb(a) + + use psb_base_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_asb + implicit none + + class(psb_s_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_asb diff --git a/gpu/impl/psb_s_elg_csmm.F90 b/gpu/impl/psb_s_elg_csmm.F90 new file mode 100644 index 00000000..8bda23e3 --- /dev/null +++ b/gpu/impl/psb_s_elg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csmm +#else + use psb_s_elg_mat_mod +#endif + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_elg_csmv +#else + use psb_s_elg_mat_mod +#endif + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_elg_csput_a +#else + use psb_s_elg_mat_mod +#endif + implicit none + + class(psb_s_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_s_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_csput_a + + + +subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding +#ifdef HAVE_SPGPU + use elldev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csput_v + use psb_s_gpu_vect_mod +#else + use psb_s_elg_mat_mod +#endif + implicit none + + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_gpu) + select type (ja) + class is (psb_i_vect_gpu) + select type (val) + class is (psb_s_vect_gpu) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceFloat(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_s_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_s_elg_csput_v diff --git a/gpu/impl/psb_s_elg_from_gpu.F90 b/gpu/impl/psb_s_elg_from_gpu.F90 new file mode 100644 index 00000000..d043790d --- /dev/null +++ b/gpu/impl/psb_s_elg_from_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_from_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_from_gpu +#else + use psb_s_elg_mat_mod +#endif + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_s_elg_from_gpu diff --git a/gpu/impl/psb_s_elg_inner_vect_sv.F90 b/gpu/impl/psb_s_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..83c79cf3 --- /dev/null +++ b/gpu/impl/psb_s_elg_inner_vect_sv.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_inner_vect_sv +#else + use psb_s_elg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_elg_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_s_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_inner_vect_sv diff --git a/gpu/impl/psb_s_elg_mold.F90 b/gpu/impl/psb_s_elg_mold.F90 new file mode 100644 index 00000000..a481d605 --- /dev/null +++ b/gpu/impl/psb_s_elg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_mold(a,b,info) + + use psb_base_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_mold + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_mold diff --git a/gpu/impl/psb_s_elg_reallocate_nz.F90 b/gpu/impl/psb_s_elg_reallocate_nz.F90 new file mode 100644 index 00000000..22916852 --- /dev/null +++ b/gpu/impl/psb_s_elg_reallocate_nz.F90 @@ -0,0 +1,79 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_reallocate_nz +#else + use psb_s_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_reallocate_nz diff --git a/gpu/impl/psb_s_elg_scal.F90 b/gpu/impl/psb_s_elg_scal.F90 new file mode 100644 index 00000000..913ae47e --- /dev/null +++ b/gpu/impl/psb_s_elg_scal.F90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_scal +#else + use psb_s_elg_mat_mod +#endif + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_scal diff --git a/gpu/impl/psb_s_elg_scals.F90 b/gpu/impl/psb_s_elg_scals.F90 new file mode 100644 index 00000000..8261fc94 --- /dev/null +++ b/gpu/impl/psb_s_elg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_scals +#else + use psb_s_elg_mat_mod +#endif + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_scals diff --git a/gpu/impl/psb_s_elg_to_gpu.F90 b/gpu/impl/psb_s_elg_to_gpu.F90 new file mode 100644 index 00000000..bf86343b --- /dev/null +++ b/gpu/impl/psb_s_elg_to_gpu.F90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_to_gpu +#else + use psb_s_elg_mat_mod +#endif + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_float,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_float,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_s_elg_to_gpu diff --git a/gpu/impl/psb_s_elg_trim.f90 b/gpu/impl/psb_s_elg_trim.f90 new file mode 100644 index 00000000..f3bd3b2f --- /dev/null +++ b/gpu/impl/psb_s_elg_trim.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_trim(a) + + use psb_base_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_trim + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_trim diff --git a/gpu/impl/psb_s_elg_vect_mv.F90 b/gpu/impl/psb_s_elg_vect_mv.F90 new file mode 100644 index 00000000..f8d297d1 --- /dev/null +++ b/gpu/impl/psb_s_elg_vect_mv.F90 @@ -0,0 +1,131 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_vect_mv +#else + use psb_s_elg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + if (a%is_dev()) call a%sync() + call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_vect_mv diff --git a/gpu/impl/psb_s_hdiag_csmv.F90 b/gpu/impl/psb_s_hdiag_csmv.F90 new file mode 100644 index 00000000..3320901c --- /dev/null +++ b/gpu/impl/psb_s_hdiag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_csmv +#else + use psb_s_hdiag_mat_mod +#endif + implicit none + class(psb_s_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='s_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_hdiag_mold + implicit none + class(psb_s_hdiag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hdiag_mold diff --git a/gpu/impl/psb_s_hdiag_to_gpu.F90 b/gpu/impl/psb_s_hdiag_to_gpu.F90 new file mode 100644 index 00000000..ade1c080 --- /dev/null +++ b/gpu/impl/psb_s_hdiag_to_gpu.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hdiag_to_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_to_gpu +#else + use psb_s_hdiag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_s_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight +#ifdef HAVE_SPGPU + type(hdiagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +#endif + +end subroutine psb_s_hdiag_to_gpu diff --git a/gpu/impl/psb_s_hdiag_vect_mv.F90 b/gpu/impl/psb_s_hdiag_vect_mv.F90 new file mode 100644 index 00000000..ac261e92 --- /dev/null +++ b/gpu/impl/psb_s_hdiag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_vect_mv +#else + use psb_s_hdiag_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_s_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hdiag_vect_mv diff --git a/gpu/impl/psb_s_hlg_allocate_mnnz.F90 b/gpu/impl/psb_s_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..c7e430f1 --- /dev/null +++ b/gpu/impl/psb_s_hlg_allocate_mnnz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_allocate_mnnz +#else + use psb_s_hlg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(hlldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_hll_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hlg_allocate_mnnz diff --git a/gpu/impl/psb_s_hlg_csmm.F90 b/gpu/impl/psb_s_hlg_csmm.F90 new file mode 100644 index 00000000..126b17e6 --- /dev/null +++ b/gpu/impl/psb_s_hlg_csmm.F90 @@ -0,0 +1,132 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_csmm +#else + use psb_s_hlg_mat_mod +#endif + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_hlg_csmv +#else + use psb_s_hlg_mat_mod +#endif + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='s_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_hlg_from_gpu +#else + use psb_s_hlg_mat_mod +#endif + implicit none + class(psb_s_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + +#ifdef HAVE_SPGPU + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_s_hlg_from_gpu diff --git a/gpu/impl/psb_s_hlg_inner_vect_sv.F90 b/gpu/impl/psb_s_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..d545eb02 --- /dev/null +++ b/gpu/impl/psb_s_hlg_inner_vect_sv.F90 @@ -0,0 +1,81 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_inner_vect_sv +#else + use psb_s_hlg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_base_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_s_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hlg_inner_vect_sv diff --git a/gpu/impl/psb_s_hlg_mold.F90 b/gpu/impl/psb_s_hlg_mold.F90 new file mode 100644 index 00000000..c5dc4774 --- /dev/null +++ b/gpu/impl/psb_s_hlg_mold.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_mold(a,b,info) + + use psb_base_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_mold + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_s_hlg_mold diff --git a/gpu/impl/psb_s_hlg_reallocate_nz.F90 b/gpu/impl/psb_s_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..19cd95df --- /dev/null +++ b/gpu/impl/psb_s_hlg_reallocate_nz.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_reallocate_nz +#else + use psb_s_hlg_mat_mod +#endif + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_s_hll_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hlg_reallocate_nz diff --git a/gpu/impl/psb_s_hlg_scal.F90 b/gpu/impl/psb_s_hlg_scal.F90 new file mode 100644 index 00000000..cd389baa --- /dev/null +++ b/gpu/impl/psb_s_hlg_scal.F90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_scal +#else + use psb_s_hlg_mat_mod +#endif + implicit none + class(psb_s_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hlg_scal diff --git a/gpu/impl/psb_s_hlg_scals.F90 b/gpu/impl/psb_s_hlg_scals.F90 new file mode 100644 index 00000000..256fac3e --- /dev/null +++ b/gpu/impl/psb_s_hlg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_scals +#else + use psb_s_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_s_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_s_hlg_scals diff --git a/gpu/impl/psb_s_hlg_to_gpu.F90 b/gpu/impl/psb_s_hlg_to_gpu.F90 new file mode 100644 index 00000000..139482c2 --- /dev/null +++ b/gpu/impl/psb_s_hlg_to_gpu.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_to_gpu +#else + use psb_s_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_s_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_float,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_s_hlg_to_gpu diff --git a/gpu/impl/psb_s_hlg_vect_mv.F90 b/gpu/impl/psb_s_hlg_vect_mv.F90 new file mode 100644 index 00000000..52f322aa --- /dev/null +++ b/gpu/impl/psb_s_hlg_vect_mv.F90 @@ -0,0 +1,129 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_vect_mv +#else + use psb_s_hlg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hlg_vect_mv diff --git a/gpu/impl/psb_s_hybg_allocate_mnnz.F90 b/gpu/impl/psb_s_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..f2b79c77 --- /dev/null +++ b/gpu/impl/psb_s_hybg_allocate_mnnz.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_allocate_mnnz +#else + use psb_s_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hybg_allocate_mnnz +#endif diff --git a/gpu/impl/psb_s_hybg_csmm.F90 b/gpu/impl/psb_s_hybg_csmm.F90 new file mode 100644 index 00000000..9de67633 --- /dev/null +++ b/gpu/impl/psb_s_hybg_csmm.F90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_csmm +#else + use psb_s_hybg_mat_mod +#endif + implicit none + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_hybg_csmv +#else + use psb_s_hybg_mat_mod +#endif + implicit none + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_hybg_inner_vect_sv +#else + use psb_s_hybg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=szero)) then + call x%sync() + call y%sync() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hybg_inner_vect_sv +#endif diff --git a/gpu/impl/psb_s_hybg_mold.F90 b/gpu/impl/psb_s_hybg_mold.F90 new file mode 100644 index 00000000..882990c0 --- /dev/null +++ b/gpu/impl/psb_s_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_mold(a,b,info) + + use psb_base_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_mold + implicit none + class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hybg_mold +#endif diff --git a/gpu/impl/psb_s_hybg_reallocate_nz.F90 b/gpu/impl/psb_s_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..46079a92 --- /dev/null +++ b/gpu/impl/psb_s_hybg_reallocate_nz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_reallocate_nz +#else + use psb_s_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_s_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hybg_reallocate_nz +#endif diff --git a/gpu/impl/psb_s_hybg_scal.F90 b/gpu/impl/psb_s_hybg_scal.F90 new file mode 100644 index 00000000..a55a8b2c --- /dev/null +++ b/gpu/impl/psb_s_hybg_scal.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_scal +#else + use psb_s_hybg_mat_mod +#endif + implicit none + class(psb_s_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hybg_scal +#endif diff --git a/gpu/impl/psb_s_hybg_scals.F90 b/gpu/impl/psb_s_hybg_scals.F90 new file mode 100644 index 00000000..ae92166f --- /dev/null +++ b/gpu/impl/psb_s_hybg_scals.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_scals +#else + use psb_s_hybg_mat_mod +#endif + implicit none + class(psb_s_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_s_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hybg_scals +#endif diff --git a/gpu/impl/psb_s_hybg_to_gpu.F90 b/gpu/impl/psb_s_hybg_to_gpu.F90 new file mode 100644 index 00000000..bfb9b261 --- /dev/null +++ b/gpu/impl/psb_s_hybg_to_gpu.F90 @@ -0,0 +1,154 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_to_gpu +#else + use psb_s_hybg_mat_mod +#endif + implicit none + class(psb_s_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_spk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if +#endif + +end subroutine psb_s_hybg_to_gpu +#endif diff --git a/gpu/impl/psb_s_hybg_vect_mv.F90 b/gpu/impl/psb_s_hybg_vect_mv.F90 new file mode 100644 index 00000000..5fe102f6 --- /dev/null +++ b/gpu/impl/psb_s_hybg_vect_mv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_vect_mv +#else + use psb_s_hybg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hybg_vect_mv +#endif diff --git a/gpu/impl/psb_s_mv_csrg_from_coo.F90 b/gpu/impl/psb_s_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..01c9db06 --- /dev/null +++ b/gpu/impl/psb_s_mv_csrg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_mv_csrg_from_coo +#else + use psb_s_csrg_mat_mod +#endif + implicit none + + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_csrg_from_coo diff --git a/gpu/impl/psb_s_mv_csrg_from_fmt.F90 b/gpu/impl/psb_s_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..0ac28af3 --- /dev/null +++ b/gpu/impl/psb_s_mv_csrg_from_fmt.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_mv_csrg_from_fmt +#else + use psb_s_csrg_mat_mod +#endif + implicit none + + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_s_mv_csrg_from_fmt diff --git a/gpu/impl/psb_s_mv_diag_from_coo.F90 b/gpu/impl/psb_s_mv_diag_from_coo.F90 new file mode 100644 index 00000000..f51607e5 --- /dev/null +++ b/gpu/impl/psb_s_mv_diag_from_coo.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_s_diag_mat_mod, psb_protect_name => psb_s_mv_diag_from_coo +#else + use psb_s_diag_mat_mod +#endif + + implicit none + + class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_diag_from_coo diff --git a/gpu/impl/psb_s_mv_elg_from_coo.F90 b/gpu/impl/psb_s_mv_elg_from_coo.F90 new file mode 100644 index 00000000..ac153f6c --- /dev/null +++ b/gpu/impl/psb_s_mv_elg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_mv_elg_from_coo +#else + use psb_s_elg_mat_mod +#endif + implicit none + + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + + +end subroutine psb_s_mv_elg_from_coo diff --git a/gpu/impl/psb_s_mv_elg_from_fmt.F90 b/gpu/impl/psb_s_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..9238544c --- /dev/null +++ b/gpu/impl/psb_s_mv_elg_from_fmt.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_mv_elg_from_fmt +#else + use psb_s_elg_mat_mod +#endif + implicit none + + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_s_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_elg_from_fmt diff --git a/gpu/impl/psb_s_mv_hdiag_from_coo.F90 b/gpu/impl/psb_s_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..dcbcfe4d --- /dev/null +++ b/gpu/impl/psb_s_mv_hdiag_from_coo.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_mv_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_s_hdiag_mat_mod +#endif + + implicit none + + class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_s_hdia_sparse_mat%mv_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_hdiag_from_coo diff --git a/gpu/impl/psb_s_mv_hlg_from_coo.F90 b/gpu/impl/psb_s_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..dc72a135 --- /dev/null +++ b/gpu/impl/psb_s_mv_hlg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_mv_hlg_from_coo +#else + use psb_s_hlg_mat_mod +#endif + implicit none + + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_s_mv_hlg_from_coo diff --git a/gpu/impl/psb_s_mv_hlg_from_fmt.F90 b/gpu/impl/psb_s_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..bbe42e4a --- /dev/null +++ b/gpu/impl/psb_s_mv_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_mv_hlg_from_fmt +#else + use psb_s_hlg_mat_mod +#endif + implicit none + + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_hlg_from_fmt diff --git a/gpu/impl/psb_s_mv_hybg_from_coo.F90 b/gpu/impl/psb_s_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..7d3197a8 --- /dev/null +++ b/gpu/impl/psb_s_mv_hybg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_mv_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_mv_hybg_from_coo +#else + use psb_s_hybg_mat_mod +#endif + implicit none + + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_hybg_from_coo +#endif diff --git a/gpu/impl/psb_s_mv_hybg_from_fmt.F90 b/gpu/impl/psb_s_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..51d8a2e6 --- /dev/null +++ b/gpu/impl/psb_s_mv_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_mv_hybg_from_fmt +#else + use psb_s_hybg_mat_mod +#endif + implicit none + + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select +end subroutine psb_s_mv_hybg_from_fmt +#endif diff --git a/gpu/impl/psb_z_cp_csrg_from_coo.F90 b/gpu/impl/psb_z_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..c3b0eebd --- /dev/null +++ b/gpu/impl/psb_z_cp_csrg_from_coo.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cp_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_cp_csrg_from_coo +#else + use psb_z_csrg_mat_mod +#endif + implicit none + + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_csrg_from_coo diff --git a/gpu/impl/psb_z_cp_csrg_from_fmt.F90 b/gpu/impl/psb_z_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..218d6c7b --- /dev/null +++ b/gpu/impl/psb_z_cp_csrg_from_fmt.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_cp_csrg_from_fmt +#else + use psb_z_csrg_mat_mod +#endif + !use iso_c_binding + implicit none + + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_z_cp_csrg_from_fmt diff --git a/gpu/impl/psb_z_cp_diag_from_coo.F90 b/gpu/impl/psb_z_cp_diag_from_coo.F90 new file mode 100644 index 00000000..013e88cd --- /dev/null +++ b/gpu/impl/psb_z_cp_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_z_diag_mat_mod, psb_protect_name => psb_z_cp_diag_from_coo +#else + use psb_z_diag_mat_mod +#endif + implicit none + + class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_z_dia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_diag_from_coo diff --git a/gpu/impl/psb_z_cp_elg_from_coo.F90 b/gpu/impl/psb_z_cp_elg_from_coo.F90 new file mode 100644 index 00000000..c9b61a99 --- /dev/null +++ b/gpu/impl/psb_z_cp_elg_from_coo.F90 @@ -0,0 +1,184 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_cp_elg_from_coo + use psi_ext_util_mod + use psb_gpu_env_mod +#else + use psb_z_elg_mat_mod +#endif + implicit none + + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ +#ifdef HAVE_SPGPU + hacksize = max(1,psb_gpu_WarpSize()) +#else + hacksize = 1 +#endif + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + +#ifdef HAVE_SPGPU + call psi_z_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() +#else + + call psi_z_convert_ell_from_coo(a,b,info,hacksize=hacksize) + call a%set_host() +#endif + + else + call b%cp_to_coo(tmp,info) +#ifdef HAVE_SPGPU + call psi_z_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() +#else + + call psi_z_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) + call a%set_host() +#endif + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_z_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_z_count_ell_from_coo + +end subroutine psb_z_cp_elg_from_coo diff --git a/gpu/impl/psb_z_cp_elg_from_fmt.F90 b/gpu/impl/psb_z_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..23468b8a --- /dev/null +++ b/gpu/impl/psb_z_cp_elg_from_fmt.F90 @@ -0,0 +1,101 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_cp_elg_from_fmt +#else + use psb_z_elg_mat_mod +#endif + implicit none + + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_z_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cp_elg_from_fmt diff --git a/gpu/impl/psb_z_cp_hdiag_from_coo.F90 b/gpu/impl/psb_z_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..b44c2854 --- /dev/null +++ b/gpu/impl/psb_z_cp_hdiag_from_coo.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_cp_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_z_hdiag_mat_mod +#endif + implicit none + + class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_z_hdia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_hdiag_from_coo diff --git a/gpu/impl/psb_z_cp_hlg_from_coo.F90 b/gpu/impl/psb_z_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..51d0c8e6 --- /dev/null +++ b/gpu/impl/psb_z_cp_hlg_from_coo.F90 @@ -0,0 +1,198 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_cp_hlg_from_coo +#else + use psb_z_hlg_mat_mod +#endif + implicit none + + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() +#ifdef HAVE_SPGPU + hksz = max(1,psb_gpu_WarpSize()) +#else + hksz = psi_get_hksz() +#endif + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_z_cp_hlg_from_coo diff --git a/gpu/impl/psb_z_cp_hlg_from_fmt.F90 b/gpu/impl/psb_z_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..a6dd5970 --- /dev/null +++ b/gpu/impl/psb_z_cp_hlg_from_fmt.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_cp_hlg_from_fmt +#else + use psb_z_hlg_mat_mod +#endif + implicit none + + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_z_hll_sparse_mat%cp_from_fmt(b,info) +#ifdef HAVE_SPGPU + if (info == 0) call a%to_gpu(info) +#endif + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_hlg_from_fmt diff --git a/gpu/impl/psb_z_cp_hybg_from_coo.F90 b/gpu/impl/psb_z_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..ebb6f60a --- /dev/null +++ b/gpu/impl/psb_z_cp_hybg_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cp_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_cp_hybg_from_coo +#else + use psb_z_hybg_mat_mod +#endif + implicit none + + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_hybg_from_coo +#endif diff --git a/gpu/impl/psb_z_cp_hybg_from_fmt.F90 b/gpu/impl/psb_z_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..82f2ac65 --- /dev/null +++ b/gpu/impl/psb_z_cp_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_cp_hybg_from_fmt +#else + use psb_z_hybg_mat_mod +#endif + implicit none + + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_z_cp_hybg_from_fmt +#endif diff --git a/gpu/impl/psb_z_csrg_allocate_mnnz.F90 b/gpu/impl/psb_z_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..8cb2ccb1 --- /dev/null +++ b/gpu/impl/psb_z_csrg_allocate_mnnz.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_allocate_mnnz +#else + use psb_z_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_csrg_allocate_mnnz diff --git a/gpu/impl/psb_z_csrg_csmm.F90 b/gpu/impl/psb_z_csrg_csmm.F90 new file mode 100644 index 00000000..eb8a4d7f --- /dev/null +++ b/gpu/impl/psb_z_csrg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_csmm +#else + use psb_z_csrg_mat_mod +#endif + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_csrg_csmv +#else + use psb_z_csrg_mat_mod +#endif + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_csrg_from_gpu +#else + use psb_z_csrg_mat_mod +#endif + implicit none + class(psb_z_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() +#endif + +end subroutine psb_z_csrg_from_gpu diff --git a/gpu/impl/psb_z_csrg_inner_vect_sv.F90 b/gpu/impl/psb_z_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..75d6800b --- /dev/null +++ b/gpu/impl/psb_z_csrg_inner_vect_sv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_inner_vect_sv +#else + use psb_z_csrg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_csrg_inner_vect_sv diff --git a/gpu/impl/psb_z_csrg_mold.F90 b/gpu/impl/psb_z_csrg_mold.F90 new file mode 100644 index 00000000..e83deb3f --- /dev/null +++ b/gpu/impl/psb_z_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_mold(a,b,info) + + use psb_base_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_mold + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_csrg_mold diff --git a/gpu/impl/psb_z_csrg_reallocate_nz.F90 b/gpu/impl/psb_z_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..c2509c22 --- /dev/null +++ b/gpu/impl/psb_z_csrg_reallocate_nz.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_reallocate_nz +#else + use psb_z_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_z_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_csrg_reallocate_nz diff --git a/gpu/impl/psb_z_csrg_scal.F90 b/gpu/impl/psb_z_csrg_scal.F90 new file mode 100644 index 00000000..d8ab0ca3 --- /dev/null +++ b/gpu/impl/psb_z_csrg_scal.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_scal +#else + use psb_z_csrg_mat_mod +#endif + implicit none + class(psb_z_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_z_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_csrg_scal diff --git a/gpu/impl/psb_z_csrg_scals.F90 b/gpu/impl/psb_z_csrg_scals.F90 new file mode 100644 index 00000000..3d14998d --- /dev/null +++ b/gpu/impl/psb_z_csrg_scals.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_scals +#else + use psb_z_csrg_mat_mod +#endif + implicit none + class(psb_z_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_z_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_csrg_scals diff --git a/gpu/impl/psb_z_csrg_to_gpu.F90 b/gpu/impl/psb_z_csrg_to_gpu.F90 new file mode 100644 index 00000000..4548935d --- /dev/null +++ b/gpu/impl/psb_z_csrg_to_gpu.F90 @@ -0,0 +1,325 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_to_gpu +#else + use psb_z_csrg_mat_mod +#endif + implicit none + class(psb_z_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_dpk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if CUDA_SHORT_VERSION <= 10 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if +#endif + +end subroutine psb_z_csrg_to_gpu diff --git a/gpu/impl/psb_z_csrg_vect_mv.F90 b/gpu/impl/psb_z_csrg_vect_mv.F90 new file mode 100644 index 00000000..0770d448 --- /dev/null +++ b/gpu/impl/psb_z_csrg_vect_mv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_vect_mv +#else + use psb_z_csrg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_z_csrg_vect_mv diff --git a/gpu/impl/psb_z_diag_csmv.F90 b/gpu/impl/psb_z_diag_csmv.F90 new file mode 100644 index 00000000..667e1a1f --- /dev/null +++ b/gpu/impl/psb_z_diag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_csmv +#else + use psb_z_diag_mat_mod +#endif + implicit none + class(psb_z_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='z_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_diag_mold + implicit none + class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_diag_mold diff --git a/gpu/impl/psb_z_diag_to_gpu.F90 b/gpu/impl/psb_z_diag_to_gpu.F90 new file mode 100644 index 00000000..40913624 --- /dev/null +++ b/gpu/impl/psb_z_diag_to_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_diag_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_to_gpu +#else + use psb_z_diag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_z_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d +#ifdef HAVE_SPGPU + type(diagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_complex_double) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_complex_double) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_z_diag_to_gpu diff --git a/gpu/impl/psb_z_diag_vect_mv.F90 b/gpu/impl/psb_z_diag_vect_mv.F90 new file mode 100644 index 00000000..b8946491 --- /dev/null +++ b/gpu/impl/psb_z_diag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_vect_mv +#else + use psb_z_diag_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_z_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_diag_vect_mv diff --git a/gpu/impl/psb_z_dnsg_mat_impl.F90 b/gpu/impl/psb_z_dnsg_mat_impl.F90 new file mode 100644 index 00000000..407deaa2 --- /dev/null +++ b/gpu/impl/psb_z_dnsg_mat_impl.F90 @@ -0,0 +1,461 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_z_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_dnsg_vect_mv +#else + use psb_z_dnsg_mat_mod +#endif + implicit none + class(psb_z_dnsg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + complex(psb_dpk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='z_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_dnsg_vect_mv + + +subroutine psb_z_dnsg_mold(a,b,info) + use psb_base_mod + use psb_z_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_dnsg_mold +#else + use psb_z_dnsg_mat_mod +#endif + implicit none + class(psb_z_dnsg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_z_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_z_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_z_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_z_dnsg_allocate_mnnz +!!$ end interface + + +subroutine psb_z_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_z_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_dnsg_to_gpu +#else + use psb_z_dnsg_mat_mod +#endif + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='z_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ +#ifdef HAVE_SPGPU + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_complex_double,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_dnsg_to_gpu + + + +subroutine psb_z_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_z_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_cp_dnsg_from_coo +#else + use psb_z_dnsg_mat_mod +#endif + implicit none + + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_z_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_z_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cp_dnsg_from_coo + +subroutine psb_z_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_z_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_cp_dnsg_from_fmt +#else + use psb_z_dnsg_mat_mod +#endif + implicit none + + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_z_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cp_dnsg_from_fmt + + + +subroutine psb_z_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_z_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_mv_dnsg_from_coo +#else + use psb_z_dnsg_mat_mod +#endif + implicit none + + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='z_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_mv_dnsg_from_coo + + +subroutine psb_z_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_z_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_mv_dnsg_from_fmt +#else + use psb_z_dnsg_mat_mod +#endif + implicit none + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_z_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_z_mv_dnsg_from_fmt diff --git a/gpu/impl/psb_z_elg_allocate_mnnz.F90 b/gpu/impl/psb_z_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..39d14dd2 --- /dev/null +++ b/gpu/impl/psb_z_elg_allocate_mnnz.F90 @@ -0,0 +1,113 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_allocate_mnnz +#else + use psb_z_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_complex_double,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize +#else + ld = m +#endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_allocate_mnnz diff --git a/gpu/impl/psb_z_elg_asb.f90 b/gpu/impl/psb_z_elg_asb.f90 new file mode 100644 index 00000000..515f579a --- /dev/null +++ b/gpu/impl/psb_z_elg_asb.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_asb(a) + + use psb_base_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_asb + implicit none + + class(psb_z_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_asb diff --git a/gpu/impl/psb_z_elg_csmm.F90 b/gpu/impl/psb_z_elg_csmm.F90 new file mode 100644 index 00000000..aa27419c --- /dev/null +++ b/gpu/impl/psb_z_elg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csmm +#else + use psb_z_elg_mat_mod +#endif + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_elg_csmv +#else + use psb_z_elg_mat_mod +#endif + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_elg_csput_a +#else + use psb_z_elg_mat_mod +#endif + implicit none + + class(psb_z_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_z_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_csput_a + + + +subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding +#ifdef HAVE_SPGPU + use elldev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csput_v + use psb_z_gpu_vect_mod +#else + use psb_z_elg_mat_mod +#endif + implicit none + + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_gpu) + select type (ja) + class is (psb_i_vect_gpu) + select type (val) + class is (psb_z_vect_gpu) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceDoubleComplex(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_z_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_z_elg_csput_v diff --git a/gpu/impl/psb_z_elg_from_gpu.F90 b/gpu/impl/psb_z_elg_from_gpu.F90 new file mode 100644 index 00000000..e8670cd4 --- /dev/null +++ b/gpu/impl/psb_z_elg_from_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_from_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_from_gpu +#else + use psb_z_elg_mat_mod +#endif + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_z_elg_from_gpu diff --git a/gpu/impl/psb_z_elg_inner_vect_sv.F90 b/gpu/impl/psb_z_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..66d7eed8 --- /dev/null +++ b/gpu/impl/psb_z_elg_inner_vect_sv.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_inner_vect_sv +#else + use psb_z_elg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_elg_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_z_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_inner_vect_sv diff --git a/gpu/impl/psb_z_elg_mold.F90 b/gpu/impl/psb_z_elg_mold.F90 new file mode 100644 index 00000000..1a5ebe54 --- /dev/null +++ b/gpu/impl/psb_z_elg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_mold(a,b,info) + + use psb_base_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_mold + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_mold diff --git a/gpu/impl/psb_z_elg_reallocate_nz.F90 b/gpu/impl/psb_z_elg_reallocate_nz.F90 new file mode 100644 index 00000000..f6bc194f --- /dev/null +++ b/gpu/impl/psb_z_elg_reallocate_nz.F90 @@ -0,0 +1,79 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_reallocate_nz +#else + use psb_z_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_reallocate_nz diff --git a/gpu/impl/psb_z_elg_scal.F90 b/gpu/impl/psb_z_elg_scal.F90 new file mode 100644 index 00000000..eed9007a --- /dev/null +++ b/gpu/impl/psb_z_elg_scal.F90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_scal +#else + use psb_z_elg_mat_mod +#endif + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_scal diff --git a/gpu/impl/psb_z_elg_scals.F90 b/gpu/impl/psb_z_elg_scals.F90 new file mode 100644 index 00000000..1e3f3682 --- /dev/null +++ b/gpu/impl/psb_z_elg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_scals +#else + use psb_z_elg_mat_mod +#endif + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_scals diff --git a/gpu/impl/psb_z_elg_to_gpu.F90 b/gpu/impl/psb_z_elg_to_gpu.F90 new file mode 100644 index 00000000..71a5ec66 --- /dev/null +++ b/gpu/impl/psb_z_elg_to_gpu.F90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_to_gpu +#else + use psb_z_elg_mat_mod +#endif + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_complex_double,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_complex_double,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_z_elg_to_gpu diff --git a/gpu/impl/psb_z_elg_trim.f90 b/gpu/impl/psb_z_elg_trim.f90 new file mode 100644 index 00000000..9bd43312 --- /dev/null +++ b/gpu/impl/psb_z_elg_trim.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_trim(a) + + use psb_base_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_trim + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_trim diff --git a/gpu/impl/psb_z_elg_vect_mv.F90 b/gpu/impl/psb_z_elg_vect_mv.F90 new file mode 100644 index 00000000..5cd72e44 --- /dev/null +++ b/gpu/impl/psb_z_elg_vect_mv.F90 @@ -0,0 +1,131 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_vect_mv +#else + use psb_z_elg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + if (a%is_dev()) call a%sync() + call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_vect_mv diff --git a/gpu/impl/psb_z_hdiag_csmv.F90 b/gpu/impl/psb_z_hdiag_csmv.F90 new file mode 100644 index 00000000..baf730a2 --- /dev/null +++ b/gpu/impl/psb_z_hdiag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_csmv +#else + use psb_z_hdiag_mat_mod +#endif + implicit none + class(psb_z_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='z_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_hdiag_mold + implicit none + class(psb_z_hdiag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hdiag_mold diff --git a/gpu/impl/psb_z_hdiag_to_gpu.F90 b/gpu/impl/psb_z_hdiag_to_gpu.F90 new file mode 100644 index 00000000..622a0141 --- /dev/null +++ b/gpu/impl/psb_z_hdiag_to_gpu.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hdiag_to_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_to_gpu +#else + use psb_z_hdiag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_z_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight +#ifdef HAVE_SPGPU + type(hdiagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +#endif + +end subroutine psb_z_hdiag_to_gpu diff --git a/gpu/impl/psb_z_hdiag_vect_mv.F90 b/gpu/impl/psb_z_hdiag_vect_mv.F90 new file mode 100644 index 00000000..3e1c859e --- /dev/null +++ b/gpu/impl/psb_z_hdiag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_vect_mv +#else + use psb_z_hdiag_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_z_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hdiag_vect_mv diff --git a/gpu/impl/psb_z_hlg_allocate_mnnz.F90 b/gpu/impl/psb_z_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..e3c05ec1 --- /dev/null +++ b/gpu/impl/psb_z_hlg_allocate_mnnz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_allocate_mnnz +#else + use psb_z_hlg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(hlldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_hll_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hlg_allocate_mnnz diff --git a/gpu/impl/psb_z_hlg_csmm.F90 b/gpu/impl/psb_z_hlg_csmm.F90 new file mode 100644 index 00000000..3432c177 --- /dev/null +++ b/gpu/impl/psb_z_hlg_csmm.F90 @@ -0,0 +1,132 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_csmm +#else + use psb_z_hlg_mat_mod +#endif + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_hlg_csmv +#else + use psb_z_hlg_mat_mod +#endif + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='z_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_hlg_from_gpu +#else + use psb_z_hlg_mat_mod +#endif + implicit none + class(psb_z_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + +#ifdef HAVE_SPGPU + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_z_hlg_from_gpu diff --git a/gpu/impl/psb_z_hlg_inner_vect_sv.F90 b/gpu/impl/psb_z_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..5a7b1031 --- /dev/null +++ b/gpu/impl/psb_z_hlg_inner_vect_sv.F90 @@ -0,0 +1,81 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_inner_vect_sv +#else + use psb_z_hlg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_base_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_z_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hlg_inner_vect_sv diff --git a/gpu/impl/psb_z_hlg_mold.F90 b/gpu/impl/psb_z_hlg_mold.F90 new file mode 100644 index 00000000..f9ff0c7a --- /dev/null +++ b/gpu/impl/psb_z_hlg_mold.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_mold(a,b,info) + + use psb_base_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_mold + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_z_hlg_mold diff --git a/gpu/impl/psb_z_hlg_reallocate_nz.F90 b/gpu/impl/psb_z_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..f3d50626 --- /dev/null +++ b/gpu/impl/psb_z_hlg_reallocate_nz.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_reallocate_nz +#else + use psb_z_hlg_mat_mod +#endif + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_z_hll_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hlg_reallocate_nz diff --git a/gpu/impl/psb_z_hlg_scal.F90 b/gpu/impl/psb_z_hlg_scal.F90 new file mode 100644 index 00000000..8aa85500 --- /dev/null +++ b/gpu/impl/psb_z_hlg_scal.F90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_scal +#else + use psb_z_hlg_mat_mod +#endif + implicit none + class(psb_z_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hlg_scal diff --git a/gpu/impl/psb_z_hlg_scals.F90 b/gpu/impl/psb_z_hlg_scals.F90 new file mode 100644 index 00000000..d5689c06 --- /dev/null +++ b/gpu/impl/psb_z_hlg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_scals +#else + use psb_z_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_z_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_z_hlg_scals diff --git a/gpu/impl/psb_z_hlg_to_gpu.F90 b/gpu/impl/psb_z_hlg_to_gpu.F90 new file mode 100644 index 00000000..d63aee9c --- /dev/null +++ b/gpu/impl/psb_z_hlg_to_gpu.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_to_gpu +#else + use psb_z_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_z_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_complex_double,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_z_hlg_to_gpu diff --git a/gpu/impl/psb_z_hlg_vect_mv.F90 b/gpu/impl/psb_z_hlg_vect_mv.F90 new file mode 100644 index 00000000..9efefc0a --- /dev/null +++ b/gpu/impl/psb_z_hlg_vect_mv.F90 @@ -0,0 +1,129 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_vect_mv +#else + use psb_z_hlg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hlg_vect_mv diff --git a/gpu/impl/psb_z_hybg_allocate_mnnz.F90 b/gpu/impl/psb_z_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..2c38c536 --- /dev/null +++ b/gpu/impl/psb_z_hybg_allocate_mnnz.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_allocate_mnnz +#else + use psb_z_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hybg_allocate_mnnz +#endif diff --git a/gpu/impl/psb_z_hybg_csmm.F90 b/gpu/impl/psb_z_hybg_csmm.F90 new file mode 100644 index 00000000..5ec9701b --- /dev/null +++ b/gpu/impl/psb_z_hybg_csmm.F90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_csmm +#else + use psb_z_hybg_mat_mod +#endif + implicit none + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_hybg_csmv +#else + use psb_z_hybg_mat_mod +#endif + implicit none + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_hybg_inner_vect_sv +#else + use psb_z_hybg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=zzero)) then + call x%sync() + call y%sync() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hybg_inner_vect_sv +#endif diff --git a/gpu/impl/psb_z_hybg_mold.F90 b/gpu/impl/psb_z_hybg_mold.F90 new file mode 100644 index 00000000..3a17dbd2 --- /dev/null +++ b/gpu/impl/psb_z_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_mold(a,b,info) + + use psb_base_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_mold + implicit none + class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hybg_mold +#endif diff --git a/gpu/impl/psb_z_hybg_reallocate_nz.F90 b/gpu/impl/psb_z_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..79d81911 --- /dev/null +++ b/gpu/impl/psb_z_hybg_reallocate_nz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_reallocate_nz +#else + use psb_z_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_z_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hybg_reallocate_nz +#endif diff --git a/gpu/impl/psb_z_hybg_scal.F90 b/gpu/impl/psb_z_hybg_scal.F90 new file mode 100644 index 00000000..c8179bf2 --- /dev/null +++ b/gpu/impl/psb_z_hybg_scal.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_scal +#else + use psb_z_hybg_mat_mod +#endif + implicit none + class(psb_z_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hybg_scal +#endif diff --git a/gpu/impl/psb_z_hybg_scals.F90 b/gpu/impl/psb_z_hybg_scals.F90 new file mode 100644 index 00000000..3729412d --- /dev/null +++ b/gpu/impl/psb_z_hybg_scals.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_scals +#else + use psb_z_hybg_mat_mod +#endif + implicit none + class(psb_z_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_z_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hybg_scals +#endif diff --git a/gpu/impl/psb_z_hybg_to_gpu.F90 b/gpu/impl/psb_z_hybg_to_gpu.F90 new file mode 100644 index 00000000..4a2a9b1c --- /dev/null +++ b/gpu/impl/psb_z_hybg_to_gpu.F90 @@ -0,0 +1,154 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_to_gpu +#else + use psb_z_hybg_mat_mod +#endif + implicit none + class(psb_z_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_dpk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if +#endif + +end subroutine psb_z_hybg_to_gpu +#endif diff --git a/gpu/impl/psb_z_hybg_vect_mv.F90 b/gpu/impl/psb_z_hybg_vect_mv.F90 new file mode 100644 index 00000000..f3b6695e --- /dev/null +++ b/gpu/impl/psb_z_hybg_vect_mv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_vect_mv +#else + use psb_z_hybg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hybg_vect_mv +#endif diff --git a/gpu/impl/psb_z_mv_csrg_from_coo.F90 b/gpu/impl/psb_z_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..21771b89 --- /dev/null +++ b/gpu/impl/psb_z_mv_csrg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_mv_csrg_from_coo +#else + use psb_z_csrg_mat_mod +#endif + implicit none + + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_csrg_from_coo diff --git a/gpu/impl/psb_z_mv_csrg_from_fmt.F90 b/gpu/impl/psb_z_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..31408214 --- /dev/null +++ b/gpu/impl/psb_z_mv_csrg_from_fmt.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_mv_csrg_from_fmt +#else + use psb_z_csrg_mat_mod +#endif + implicit none + + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_z_mv_csrg_from_fmt diff --git a/gpu/impl/psb_z_mv_diag_from_coo.F90 b/gpu/impl/psb_z_mv_diag_from_coo.F90 new file mode 100644 index 00000000..8872c890 --- /dev/null +++ b/gpu/impl/psb_z_mv_diag_from_coo.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_z_diag_mat_mod, psb_protect_name => psb_z_mv_diag_from_coo +#else + use psb_z_diag_mat_mod +#endif + + implicit none + + class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_diag_from_coo diff --git a/gpu/impl/psb_z_mv_elg_from_coo.F90 b/gpu/impl/psb_z_mv_elg_from_coo.F90 new file mode 100644 index 00000000..2d78edc6 --- /dev/null +++ b/gpu/impl/psb_z_mv_elg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_mv_elg_from_coo +#else + use psb_z_elg_mat_mod +#endif + implicit none + + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + + +end subroutine psb_z_mv_elg_from_coo diff --git a/gpu/impl/psb_z_mv_elg_from_fmt.F90 b/gpu/impl/psb_z_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..3bf663b3 --- /dev/null +++ b/gpu/impl/psb_z_mv_elg_from_fmt.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_mv_elg_from_fmt +#else + use psb_z_elg_mat_mod +#endif + implicit none + + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_z_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_elg_from_fmt diff --git a/gpu/impl/psb_z_mv_hdiag_from_coo.F90 b/gpu/impl/psb_z_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..e1df9cc4 --- /dev/null +++ b/gpu/impl/psb_z_mv_hdiag_from_coo.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_mv_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_z_hdiag_mat_mod +#endif + + implicit none + + class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_z_hdia_sparse_mat%mv_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_hdiag_from_coo diff --git a/gpu/impl/psb_z_mv_hlg_from_coo.F90 b/gpu/impl/psb_z_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..ce037be2 --- /dev/null +++ b/gpu/impl/psb_z_mv_hlg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_mv_hlg_from_coo +#else + use psb_z_hlg_mat_mod +#endif + implicit none + + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_z_mv_hlg_from_coo diff --git a/gpu/impl/psb_z_mv_hlg_from_fmt.F90 b/gpu/impl/psb_z_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..4ea1b385 --- /dev/null +++ b/gpu/impl/psb_z_mv_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_mv_hlg_from_fmt +#else + use psb_z_hlg_mat_mod +#endif + implicit none + + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_hlg_from_fmt diff --git a/gpu/impl/psb_z_mv_hybg_from_coo.F90 b/gpu/impl/psb_z_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..3424caea --- /dev/null +++ b/gpu/impl/psb_z_mv_hybg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_mv_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_mv_hybg_from_coo +#else + use psb_z_hybg_mat_mod +#endif + implicit none + + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_hybg_from_coo +#endif diff --git a/gpu/impl/psb_z_mv_hybg_from_fmt.F90 b/gpu/impl/psb_z_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..90c35897 --- /dev/null +++ b/gpu/impl/psb_z_mv_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_mv_hybg_from_fmt +#else + use psb_z_hybg_mat_mod +#endif + implicit none + + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select +end subroutine psb_z_mv_hybg_from_fmt +#endif diff --git a/gpu/ivectordev.c b/gpu/ivectordev.c new file mode 100644 index 00000000..93636465 --- /dev/null +++ b/gpu/ivectordev.c @@ -0,0 +1,182 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +#if defined(HAVE_SPGPU) +//#include "utils.h" +//#include "common.h" +#include "ivectordev.h" + + +int registerMappedInt(void *buff, void **d_p, int n, int dummy) +{ + return registerMappedMemory(buff,d_p,n*sizeof(int)); +} + +int writeMultiVecDeviceInt(void* deviceVec, int* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(int)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + + return(i); +} + +int writeMultiVecDeviceIntR2(void* deviceVec, int* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceInt(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceIntR2",i); + } + return(i); +} + +int readMultiVecDeviceInt(void* deviceVec, int* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(int)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceInt",i); + } + return(i); +} + +int readMultiVecDeviceIntR2(void* deviceVec, int* hostVec, int ld) +{ int i; + i = readMultiVecDeviceInt(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceIntR2",i); + } + return(i); +} + + +int setscalMultiVecDeviceInt(int val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuIsetscal(handle, first, last, indexBase, val, (int *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceInt(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + int beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = 0; + else if (dupl == INS_ADD) + beta = 1; + else + beta = 0; + + spgpuIscat(handle, (int *) devVecX->v_, n, (int *)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceInt(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceInt(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + int *hv = (int *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuIgath(handle,hv, n, idx,indexBase, (int *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, int first, void *deviceIdx, + int hfirst, void* host_values, int indexBase, int beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceInt(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, int beta) +{ int i=0; + int *hv = (int *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuIscat(handle, (int *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + +#endif + diff --git a/gpu/ivectordev.h b/gpu/ivectordev.h new file mode 100644 index 00000000..5f7ca974 --- /dev/null +++ b/gpu/ivectordev.h @@ -0,0 +1,64 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +#if defined(HAVE_SPGPU) +//#include "utils.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +int registerMappedInt(void *, void **, int, int); +int writeMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec); +int writeMultiVecDeviceIntR2(void* deviceMultiVec, int* hostMultiVec, int ld); +int readMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec); +int readMultiVecDeviceIntR2(void* deviceMultiVec, int* hostMultiVec, int ld); + +int setscalMultiVecDeviceInt(int val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceInt(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceInt(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, int beta); +int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, int beta); + +#endif diff --git a/gpu/psb_base_vectordev_mod.F90 b/gpu/psb_base_vectordev_mod.F90 new file mode 100644 index 00000000..f8c303d0 --- /dev/null +++ b/gpu/psb_base_vectordev_mod.F90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_base_vectordev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: multivec_dev_parms + integer(c_int) :: count + integer(c_int) :: element_type + integer(c_int) :: pitch + integer(c_int) :: size + end type multivec_dev_parms + +#ifdef HAVE_SPGPU + + + interface + function FallocMultiVecDevice(deviceVec,count,Size,elementType) & + & result(res) bind(c,name='FallocMultiVecDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: count,Size,elementType + type(c_ptr) :: deviceVec + end function FallocMultiVecDevice + end interface + + + interface + subroutine unregisterMapped(buf) & + & bind(c,name='unregisterMapped') + use iso_c_binding + type(c_ptr), value :: buf + end subroutine unregisterMapped + end interface + + interface + subroutine freeMultiVecDevice(deviceVec) & + & bind(c,name='freeMultiVecDevice') + use iso_c_binding + type(c_ptr), value :: deviceVec + end subroutine freeMultiVecDevice + end interface + + interface + function getMultiVecDeviceSize(deviceVec) & + & bind(c,name='getMultiVecDeviceSize') result(res) + use iso_c_binding + type(c_ptr), value :: deviceVec + integer(c_int) :: res + end function getMultiVecDeviceSize + end interface + + interface + function getMultiVecDeviceCount(deviceVec) & + & bind(c,name='getMultiVecDeviceCount') result(res) + use iso_c_binding + type(c_ptr), value :: deviceVec + integer(c_int) :: res + end function getMultiVecDeviceCount + end interface + + interface + function getMultiVecDevicePitch(deviceVec) & + & bind(c,name='getMultiVecDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceVec + integer(c_int) :: res + end function getMultiVecDevicePitch + end interface + +#endif + + +end module psb_base_vectordev_mod diff --git a/gpu/psb_c_csrg_mat_mod.F90 b/gpu/psb_c_csrg_mat_mod.F90 new file mode 100644 index 00000000..203a6dbf --- /dev/null +++ b/gpu/psb_c_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_csrg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_csr_sparse_mat) :: psb_c_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(c_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => c_csrg_get_fmt + procedure, pass(a) :: sizeof => c_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_c_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_csrg_csmm + procedure, pass(a) :: csmv => psb_c_csrg_csmv + procedure, pass(a) :: scals => psb_c_csrg_scals + procedure, pass(a) :: scalv => psb_c_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_c_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_mv_csrg_from_fmt + procedure, pass(a) :: free => c_csrg_free + procedure, pass(a) :: mold => psb_c_csrg_mold + procedure, pass(a) :: is_host => c_csrg_is_host + procedure, pass(a) :: is_dev => c_csrg_is_dev + procedure, pass(a) :: is_sync => c_csrg_is_sync + procedure, pass(a) :: set_host => c_csrg_set_host + procedure, pass(a) :: set_dev => c_csrg_set_dev + procedure, pass(a) :: set_sync => c_csrg_set_sync + procedure, pass(a) :: sync => c_csrg_sync + procedure, pass(a) :: to_gpu => psb_c_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_c_csrg_from_gpu + final :: c_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_csrg_mold +#endif + end type psb_c_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_csrg_get_nzeros, c_csrg_free, c_csrg_get_fmt, & + & c_csrg_get_size, c_csrg_sizeof, c_csrg_get_nz_row + + + interface + subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_c_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_csrg_vect_mv + end interface + + interface + subroutine psb_c_csrg_reallocate_nz(nz,a) + import :: psb_c_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_c_csrg_reallocate_nz + end interface + + interface + subroutine psb_c_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_c_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_csrg_allocate_mnnz + end interface + + interface + subroutine psb_c_csrg_mold(a,b,info) + import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_csrg_mold + end interface + + interface + subroutine psb_c_csrg_to_gpu(a,info, nzrm) + import :: psb_c_csrg_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_csrg_to_gpu + end interface + + interface + subroutine psb_c_csrg_from_gpu(a,info) + import :: psb_c_csrg_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_csrg_from_gpu + end interface + + interface + subroutine psb_c_cp_csrg_from_coo(a,b,info) + import :: psb_c_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_csrg_from_coo + end interface + + interface + subroutine psb_c_cp_csrg_from_fmt(a,b,info) + import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_csrg_from_fmt + end interface + + interface + subroutine psb_c_mv_csrg_from_coo(a,b,info) + import :: psb_c_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_csrg_from_coo + end interface + + interface + subroutine psb_c_mv_csrg_from_fmt(a,b,info) + import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_csrg_from_fmt + end interface + + interface + subroutine psb_c_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_csrg_csmv + end interface + interface + subroutine psb_c_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_csrg_csmm + end interface + + interface + subroutine psb_c_csrg_scal(d,a,info,side) + import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_csrg_scal + end interface + + interface + subroutine psb_c_csrg_scals(d,a,info) + import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_csrg_sizeof(a) result(res) + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_csrg_sizeof + + function c_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function c_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine c_csrg_set_host(a) + implicit none + class(psb_c_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_csrg_set_host + + subroutine c_csrg_set_dev(a) + implicit none + class(psb_c_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_csrg_set_dev + + subroutine c_csrg_set_sync(a) + implicit none + class(psb_c_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_csrg_set_sync + + function c_csrg_is_dev(a) result(res) + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_csrg_is_dev + + function c_csrg_is_host(a) result(res) + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_csrg_is_host + + function c_csrg_is_sync(a) result(res) + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_csrg_is_sync + + + subroutine c_csrg_sync(a) + implicit none + class(psb_c_csrg_sparse_mat), target, intent(in) :: a + class(psb_c_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine c_csrg_sync + + subroutine c_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_c_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_c_csr_sparse_mat%free() + + return + + end subroutine c_csrg_free + + subroutine c_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_c_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine c_csrg_finalize + +#else + interface + subroutine psb_c_csrg_mold(a,b,info) + import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_csrg_mold + end interface + +#endif + +end module psb_c_csrg_mat_mod diff --git a/gpu/psb_c_diag_mat_mod.F90 b/gpu/psb_c_diag_mat_mod.F90 new file mode 100644 index 00000000..a7ab2fbb --- /dev/null +++ b/gpu/psb_c_diag_mat_mod.F90 @@ -0,0 +1,308 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_c_dia_mat_mod + + type, extends(psb_c_dia_sparse_mat) :: psb_c_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => c_diag_get_fmt + procedure, pass(a) :: sizeof => c_diag_sizeof + procedure, pass(a) :: vect_mv => psb_c_diag_vect_mv +! procedure, pass(a) :: csmm => psb_c_diag_csmm + procedure, pass(a) :: csmv => psb_c_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_c_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_c_diag_scals +! procedure, pass(a) :: scalv => psb_c_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_c_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_c_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_c_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_c_mv_diag_from_fmt + procedure, pass(a) :: free => c_diag_free + procedure, pass(a) :: mold => psb_c_diag_mold + procedure, pass(a) :: to_gpu => psb_c_diag_to_gpu + final :: c_diag_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_diag_mold +#endif + end type psb_c_diag_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_diag_get_nzeros, c_diag_free, c_diag_get_fmt, & + & c_diag_get_size, c_diag_sizeof, c_diag_get_nz_row + + + interface + subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_diag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_diag_vect_mv + end interface + + interface + subroutine psb_c_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_diag_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_diag_inner_vect_sv + end interface + + interface + subroutine psb_c_diag_reallocate_nz(nz,a) + import :: psb_c_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_diag_sparse_mat), intent(inout) :: a + end subroutine psb_c_diag_reallocate_nz + end interface + + interface + subroutine psb_c_diag_allocate_mnnz(m,n,a,nz) + import :: psb_c_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_diag_allocate_mnnz + end interface + + interface + subroutine psb_c_diag_mold(a,b,info) + import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_diag_mold + end interface + + interface + subroutine psb_c_diag_to_gpu(a,info, nzrm) + import :: psb_c_diag_sparse_mat, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_diag_to_gpu + end interface + + interface + subroutine psb_c_cp_diag_from_coo(a,b,info) + import :: psb_c_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_diag_from_coo + end interface + + interface + subroutine psb_c_cp_diag_from_fmt(a,b,info) + import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_diag_from_fmt + end interface + + interface + subroutine psb_c_mv_diag_from_coo(a,b,info) + import :: psb_c_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_diag_from_coo + end interface + + + interface + subroutine psb_c_mv_diag_from_fmt(a,b,info) + import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_diag_from_fmt + end interface + + interface + subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_diag_csmv + end interface + interface + subroutine psb_c_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_diag_csmm + end interface + + interface + subroutine psb_c_diag_scal(d,a,info, side) + import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_diag_scal + end interface + + interface + subroutine psb_c_diag_scals(d,a,info) + import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_diag_sizeof(a) result(res) + implicit none + class(psb_c_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_diag_sizeof + + function c_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function c_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_dia_sparse_mat%free() + + return + + end subroutine c_diag_free + + subroutine c_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_c_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine c_diag_finalize + +#else + + interface + subroutine psb_c_diag_mold(a,b,info) + import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_diag_mold + end interface + +#endif + +end module psb_c_diag_mat_mod diff --git a/gpu/psb_c_dnsg_mat_mod.F90 b/gpu/psb_c_dnsg_mat_mod.F90 new file mode 100644 index 00000000..7fe5fdda --- /dev/null +++ b/gpu/psb_c_dnsg_mat_mod.F90 @@ -0,0 +1,294 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_dnsg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use psb_c_dns_mat_mod + use dnsdev_mod + + type, extends(psb_c_dns_sparse_mat) :: psb_c_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of DNS, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => c_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => c_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_c_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_c_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_c_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_c_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_c_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_c_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_c_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_c_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_c_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_mv_dnsg_from_fmt + procedure, pass(a) :: free => c_dnsg_free + procedure, pass(a) :: mold => psb_c_dnsg_mold + procedure, pass(a) :: to_gpu => psb_c_dnsg_to_gpu + final :: c_dnsg_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_dnsg_mold +#endif + end type psb_c_dnsg_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_dnsg_get_nzeros, c_dnsg_free, c_dnsg_get_fmt, & + & c_dnsg_get_size, c_dnsg_get_nz_row + + + interface + subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_c_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_c_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_c_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_c_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_c_dnsg_mold(a,b,info) + import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dnsg_mold + end interface + + interface + subroutine psb_c_dnsg_to_gpu(a,info) + import :: psb_c_dnsg_sparse_mat, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dnsg_to_gpu + end interface + + interface + subroutine psb_c_cp_dnsg_from_coo(a,b,info) + import :: psb_c_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dnsg_from_coo + end interface + + interface + subroutine psb_c_cp_dnsg_from_fmt(a,b,info) + import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_c_mv_dnsg_from_coo(a,b,info) + import :: psb_c_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_c_mv_dnsg_from_fmt(a,b,info) + import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_c_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_c_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_dnsg_scal(d,a,info, side) +!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_c_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_dnsg_scals(d,a,info) +!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function c_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function c_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_dns_sparse_mat%free() + + return + + end subroutine c_dnsg_free + + subroutine c_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_c_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine c_dnsg_finalize + +#else + + interface + subroutine psb_c_dnsg_mold(a,b,info) + import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dnsg_mold + end interface + +#endif + +end module psb_c_dnsg_mat_mod diff --git a/gpu/psb_c_elg_mat_mod.F90 b/gpu/psb_c_elg_mat_mod.F90 new file mode 100644 index 00000000..83355b9d --- /dev/null +++ b/gpu/psb_c_elg_mat_mod.F90 @@ -0,0 +1,483 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_elg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use psb_c_ell_mat_mod + use psb_i_gpu_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_ell_sparse_mat) :: psb_c_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => c_elg_get_fmt + procedure, pass(a) :: sizeof => c_elg_sizeof + procedure, pass(a) :: vect_mv => psb_c_elg_vect_mv + procedure, pass(a) :: csmm => psb_c_elg_csmm + procedure, pass(a) :: csmv => psb_c_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_c_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_c_elg_scals + procedure, pass(a) :: scalv => psb_c_elg_scal + procedure, pass(a) :: reallocate_nz => psb_c_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_elg_allocate_mnnz + procedure, pass(a) :: reinit => c_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_mv_elg_from_fmt + procedure, pass(a) :: free => c_elg_free + procedure, pass(a) :: mold => psb_c_elg_mold + procedure, pass(a) :: csput_a => psb_c_elg_csput_a + procedure, pass(a) :: csput_v => psb_c_elg_csput_v + procedure, pass(a) :: is_host => c_elg_is_host + procedure, pass(a) :: is_dev => c_elg_is_dev + procedure, pass(a) :: is_sync => c_elg_is_sync + procedure, pass(a) :: set_host => c_elg_set_host + procedure, pass(a) :: set_dev => c_elg_set_dev + procedure, pass(a) :: set_sync => c_elg_set_sync + procedure, pass(a) :: sync => c_elg_sync + procedure, pass(a) :: from_gpu => psb_c_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_c_elg_to_gpu + procedure, pass(a) :: asb => psb_c_elg_asb + final :: c_elg_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_elg_mold + procedure, pass(a) :: asb => psb_c_elg_asb +#endif + end type psb_c_elg_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_elg_get_nzeros, c_elg_free, c_elg_get_fmt, & + & c_elg_get_size, c_elg_sizeof, c_elg_get_nz_row, c_elg_sync + + + interface + subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_elg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_elg_vect_mv + end interface + + interface + subroutine psb_c_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_elg_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_elg_inner_vect_sv + end interface + + interface + subroutine psb_c_elg_reallocate_nz(nz,a) + import :: psb_c_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_elg_reallocate_nz + end interface + + interface + subroutine psb_c_elg_allocate_mnnz(m,n,a,nz) + import :: psb_c_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_elg_allocate_mnnz + end interface + + interface + subroutine psb_c_elg_mold(a,b,info) + import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_elg_mold + end interface + + interface + subroutine psb_c_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_elg_csput_a + end interface + + interface + subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_c_base_vect_type,& + & psb_i_base_vect_type + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_elg_csput_v + end interface + + interface + subroutine psb_c_elg_from_gpu(a,info) + import :: psb_c_elg_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_elg_from_gpu + end interface + + interface + subroutine psb_c_elg_to_gpu(a,info, nzrm) + import :: psb_c_elg_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_elg_to_gpu + end interface + + interface + subroutine psb_c_cp_elg_from_coo(a,b,info) + import :: psb_c_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_elg_from_coo + end interface + + interface + subroutine psb_c_cp_elg_from_fmt(a,b,info) + import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_elg_from_fmt + end interface + + interface + subroutine psb_c_mv_elg_from_coo(a,b,info) + import :: psb_c_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_elg_from_coo + end interface + + + interface + subroutine psb_c_mv_elg_from_fmt(a,b,info) + import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_elg_from_fmt + end interface + + interface + subroutine psb_c_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_elg_csmv + end interface + interface + subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_elg_csmm + end interface + + interface + subroutine psb_c_elg_scal(d,a,info, side) + import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_elg_scal + end interface + + interface + subroutine psb_c_elg_scals(d,a,info) + import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_elg_scals + end interface + + interface + subroutine psb_c_elg_asb(a) + import :: psb_c_elg_sparse_mat + class(psb_c_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_elg_sizeof(a) result(res) + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_elg_sizeof + + function c_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function c_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine c_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_c_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = czero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine c_elg_reinit + + subroutine c_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_c_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine c_elg_free + + subroutine c_elg_sync(a) + implicit none + class(psb_c_elg_sparse_mat), target, intent(in) :: a + class(psb_c_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine c_elg_sync + + subroutine c_elg_set_host(a) + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_elg_set_host + + subroutine c_elg_set_dev(a) + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_elg_set_dev + + subroutine c_elg_set_sync(a) + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_elg_set_sync + + function c_elg_is_dev(a) result(res) + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_elg_is_dev + + function c_elg_is_host(a) result(res) + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_elg_is_host + + function c_elg_is_sync(a) result(res) + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_elg_is_sync + + subroutine c_elg_finalize(a) + use elldev_mod + implicit none + type(psb_c_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine c_elg_finalize + +#else + + interface + subroutine psb_c_elg_asb(a) + import :: psb_c_elg_sparse_mat + class(psb_c_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_elg_asb + end interface + + interface + subroutine psb_c_elg_mold(a,b,info) + import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_elg_mold + end interface + +#endif + +end module psb_c_elg_mat_mod diff --git a/gpu/psb_c_gpu_vect_mod.F90 b/gpu/psb_c_gpu_vect_mod.F90 new file mode 100644 index 00000000..4c31154f --- /dev/null +++ b/gpu/psb_c_gpu_vect_mod.F90 @@ -0,0 +1,1989 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_gpu_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_c_vect_mod + use psb_i_vect_mod +#ifdef HAVE_SPGPU + use psb_gpu_env_mod + use psb_i_gpu_vect_mod + use psb_i_vectordev_mod + use psb_c_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_base_vect_type) :: psb_c_vect_gpu +#ifdef HAVE_SPGPU + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + complex(c_float_complex), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + complex(c_float_complex), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => c_gpu_get_nrows + procedure, nopass :: get_fmt => c_gpu_get_fmt + + procedure, pass(x) :: all => c_gpu_all + procedure, pass(x) :: zero => c_gpu_zero + procedure, pass(x) :: asb_m => c_gpu_asb_m + procedure, pass(x) :: sync => c_gpu_sync + procedure, pass(x) :: sync_space => c_gpu_sync_space + procedure, pass(x) :: bld_x => c_gpu_bld_x + procedure, pass(x) :: bld_mn => c_gpu_bld_mn + procedure, pass(x) :: free => c_gpu_free + procedure, pass(x) :: ins_a => c_gpu_ins_a + procedure, pass(x) :: ins_v => c_gpu_ins_v + procedure, pass(x) :: is_host => c_gpu_is_host + procedure, pass(x) :: is_dev => c_gpu_is_dev + procedure, pass(x) :: is_sync => c_gpu_is_sync + procedure, pass(x) :: set_host => c_gpu_set_host + procedure, pass(x) :: set_dev => c_gpu_set_dev + procedure, pass(x) :: set_sync => c_gpu_set_sync + procedure, pass(x) :: set_scal => c_gpu_set_scal +!!$ procedure, pass(x) :: set_vect => c_gpu_set_vect + procedure, pass(x) :: gthzv_x => c_gpu_gthzv_x + procedure, pass(y) :: sctb => c_gpu_sctb + procedure, pass(y) :: sctb_x => c_gpu_sctb_x + procedure, pass(x) :: gthzbuf => c_gpu_gthzbuf + procedure, pass(y) :: sctb_buf => c_gpu_sctb_buf + procedure, pass(x) :: new_buffer => c_gpu_new_buffer + procedure, nopass :: device_wait => c_gpu_device_wait + procedure, pass(x) :: free_buffer => c_gpu_free_buffer + procedure, pass(x) :: maybe_free_buffer => c_gpu_maybe_free_buffer + procedure, pass(x) :: dot_v => c_gpu_dot_v + procedure, pass(x) :: dot_a => c_gpu_dot_a + procedure, pass(y) :: axpby_v => c_gpu_axpby_v + procedure, pass(y) :: axpby_a => c_gpu_axpby_a + procedure, pass(y) :: mlt_v => c_gpu_mlt_v + procedure, pass(y) :: mlt_a => c_gpu_mlt_a + procedure, pass(z) :: mlt_a_2 => c_gpu_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => c_gpu_mlt_v_2 + procedure, pass(x) :: scal => c_gpu_scal + procedure, pass(x) :: nrm2 => c_gpu_nrm2 + procedure, pass(x) :: amax => c_gpu_amax + procedure, pass(x) :: asum => c_gpu_asum + procedure, pass(x) :: absval1 => c_gpu_absval1 + procedure, pass(x) :: absval2 => c_gpu_absval2 + + final :: c_gpu_vect_finalize +#endif + end type psb_c_vect_gpu + + public :: psb_c_vect_gpu_ + private :: constructor + interface psb_c_vect_gpu_ + module procedure constructor + end interface psb_c_vect_gpu_ + +contains + + function constructor(x) result(this) + complex(psb_spk_) :: x(:) + type(psb_c_vect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + +#ifdef HAVE_SPGPU + + subroutine c_gpu_device_wait() + call psb_cudaSync() + end subroutine c_gpu_device_wait + + subroutine c_gpu_new_buffer(n,x,info) + use psb_realloc_mod + use psb_gpu_env_mod + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_gpu_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloatComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloatComplex(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloatComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceFloatComplex(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloatComplex(x%dt_buf,y,n) + + end select + + end subroutine c_gpu_gthzv_x + + subroutine c_gpu_gthzbuf(i,n,idx,x) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i,n + class(psb_i_base_vect_type) :: idx + class(psb_c_vect_gpu) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readFloatComplex(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceFloatComplex(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readFloatComplex(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine c_gpu_gthzbuf + + subroutine c_gpu_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_ipk_) :: n, idx(:) + complex(psb_spk_) :: beta, x(:) + class(psb_c_vect_gpu) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_c_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine c_gpu_sctb + + subroutine c_gpu_sctb_x(i,n,idx,x,beta,y) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_spk_) :: beta, x(:) + class(psb_c_vect_gpu) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_gpu_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloatComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeFloatComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloatComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeFloatComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloatComplex(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine c_gpu_sctb_x + + subroutine c_gpu_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_gpu_env_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_spk_) :: beta + class(psb_c_vect_gpu) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_gpu) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_gpu_DeviceHasUVA()) then + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeFloatComplex(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeFloatComplex(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceFloatComplex(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine c_gpu_sctb_buf + + + subroutine c_gpu_bld_x(x,this) + use psb_base_mod + complex(psb_spk_), intent(in) :: this(:) + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_gpu_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine c_gpu_bld_x + + subroutine c_gpu_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'c_gpu_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine c_gpu_bld_mn + + subroutine c_gpu_set_host(x) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine c_gpu_set_host + + subroutine c_gpu_set_dev(x) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine c_gpu_set_dev + + subroutine c_gpu_set_sync(x) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine c_gpu_set_sync + + function c_gpu_is_dev(x) result(res) + implicit none + class(psb_c_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function c_gpu_is_dev + + function c_gpu_is_host(x) result(res) + implicit none + class(psb_c_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function c_gpu_is_host + + function c_gpu_is_sync(x) result(res) + implicit none + class(psb_c_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function c_gpu_is_sync + + + function c_gpu_get_nrows(x) result(res) + implicit none + class(psb_c_vect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function c_gpu_get_nrows + + function c_gpu_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'cGPU' + end function c_gpu_get_fmt + + subroutine c_gpu_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_c_vect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_gpu_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine c_gpu_all + + subroutine c_gpu_zero(x) + use psi_serial_mod + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=czero + call x%set_host() + end subroutine c_gpu_zero + + subroutine c_gpu_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_c_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'c_gpu_sync') + end if + + end subroutine c_gpu_sync + + subroutine c_gpu_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine c_gpu_free + + subroutine c_gpu_set_scal(x,val,first,last) + class(psb_c_vect_gpu), intent(inout) :: x + complex(psb_spk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + if (x%is_host()) call x%sync() + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine c_gpu_set_scal +!!$ +!!$ subroutine c_gpu_set_vect(x,val) +!!$ class(psb_c_vect_gpu), intent(inout) :: x +!!$ complex(psb_spk_), intent(in) :: val(:) +!!$ integer(psb_ipk_) :: nr +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_c_base_vect_type%set_vect(val) +!!$ call x%set_host() +!!$ +!!$ end subroutine c_gpu_set_vect + + + + function c_gpu_dot_v(n,x,y) result(res) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_) :: res + complex(psb_spk_), external :: ddot + integer(psb_ipk_) :: info + + res = czero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_c_vect + ! + select type(yy => y) + type is (psb_c_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n,x%v,1,yy%v,1) + type is (psb_c_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'c_gpu_dot_v') + end if + + class default + ! y%sync is done in dot_a + call x%sync() + res = y%dot(n,x%v) + end select + + end function c_gpu_dot_v + + function c_gpu_dot_a(n,x,y) result(res) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + complex(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_) :: res + complex(psb_spk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n,y,1,x%v,1) + + end function c_gpu_dot_a + + subroutine c_gpu_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_vect_gpu), intent(inout) :: y + complex(psb_spk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_c_vect_gpu) + ! Do something different here + if ((beta /= czero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + type is (psb_c_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_c_vect_gpu) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,cone,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine c_gpu_mlt_v + + subroutine c_gpu_mlt_a(x, y, info) + use psi_serial_mod + implicit none + complex(psb_spk_), intent(in) :: x(:) + class(psb_c_vect_gpu), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_c_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine c_gpu_mlt_a + + subroutine c_gpu_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + complex(psb_spk_), intent(in) :: alpha,beta + complex(psb_spk_), intent(in) :: x(:) + complex(psb_spk_), intent(in) :: y(:) + class(psb_c_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_c_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine c_gpu_mlt_a_2 + + subroutine c_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + complex(psb_spk_), intent(in) :: alpha,beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_c_vect_gpu) + select type (yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= czero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= czero).and.(z%is_dev())) call z%sync() + call z%psb_c_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= czero).and.(z%is_dev())) call z%sync() + call z%psb_c_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine c_gpu_mlt_v_2 + + subroutine c_gpu_scal(alpha, x) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + complex(psb_spk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine c_gpu_scal + + + function c_gpu_nrm2(n,x) result(res) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDeviceComplex(res,n,x%deviceVect) + + end function c_gpu_nrm2 + + function c_gpu_amax(n,x) result(res) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDeviceComplex(res,n,x%deviceVect) + + end function c_gpu_amax + + function c_gpu_asum(n,x) result(res) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDeviceComplex(res,n,x%deviceVect) + + end function c_gpu_asum + + subroutine c_gpu_absval1(x) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,cone,x%deviceVect) + + end subroutine c_gpu_absval1 + + subroutine c_gpu_absval2(x,y) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_c_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,cone,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_c_base_vect_type%absval(y) + end select + end subroutine c_gpu_absval2 + + + subroutine c_gpu_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine c_gpu_vect_finalize + + subroutine c_gpu_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_c_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_gpu + + info = 0 + if (psb_errstatus_fatal()) return + + done_gpu = .false. + select type(virl => irl) + class is (psb_i_vect_gpu) + select type(vval => val) + class is (psb_c_vect_gpu) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceFloatComplex(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_gpu=.true. + end select + end select + + if (.not.done_gpu) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'gpu_vect_ins') + return + end if + + end subroutine c_gpu_ins_v + + subroutine c_gpu_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_c_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine c_gpu_ins_a + +#endif + +end module psb_c_gpu_vect_mod + + +! +! Multivectors +! + + + +module psb_c_gpu_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_c_multivect_mod + use psb_c_base_multivect_mod + + use psb_i_multivect_mod +#ifdef HAVE_SPGPU + use psb_i_gpu_multivect_mod + use psb_c_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_base_multivect_type) :: psb_c_multivect_gpu +#ifdef HAVE_SPGPU + + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => c_gpu_multi_get_nrows + procedure, pass(x) :: get_ncols => c_gpu_multi_get_ncols + procedure, nopass :: get_fmt => c_gpu_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => c_gpu_multi_dot_v +!!$ procedure, pass(x) :: dot_a => c_gpu_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => c_gpu_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => c_gpu_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => c_gpu_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => c_gpu_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => c_gpu_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => c_gpu_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => c_gpu_multi_scal +!!$ procedure, pass(x) :: nrm2 => c_gpu_multi_nrm2 +!!$ procedure, pass(x) :: amax => c_gpu_multi_amax +!!$ procedure, pass(x) :: asum => c_gpu_multi_asum + procedure, pass(x) :: all => c_gpu_multi_all + procedure, pass(x) :: zero => c_gpu_multi_zero + procedure, pass(x) :: asb => c_gpu_multi_asb + procedure, pass(x) :: sync => c_gpu_multi_sync + procedure, pass(x) :: sync_space => c_gpu_multi_sync_space + procedure, pass(x) :: bld_x => c_gpu_multi_bld_x + procedure, pass(x) :: bld_n => c_gpu_multi_bld_n + procedure, pass(x) :: free => c_gpu_multi_free + procedure, pass(x) :: ins => c_gpu_multi_ins + procedure, pass(x) :: is_host => c_gpu_multi_is_host + procedure, pass(x) :: is_dev => c_gpu_multi_is_dev + procedure, pass(x) :: is_sync => c_gpu_multi_is_sync + procedure, pass(x) :: set_host => c_gpu_multi_set_host + procedure, pass(x) :: set_dev => c_gpu_multi_set_dev + procedure, pass(x) :: set_sync => c_gpu_multi_set_sync + procedure, pass(x) :: set_scal => c_gpu_multi_set_scal + procedure, pass(x) :: set_vect => c_gpu_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => c_gpu_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => c_gpu_multi_sctb +!!$ procedure, pass(y) :: sctb_x => c_gpu_multi_sctb_x + final :: c_gpu_multi_vect_finalize +#endif + end type psb_c_multivect_gpu + + public :: psb_c_multivect_gpu + private :: constructor + interface psb_c_multivect_gpu + module procedure constructor + end interface + +contains + + function constructor(x) result(this) + complex(psb_spk_) :: x(:,:) + type(psb_c_multivect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function constructor + +#ifdef HAVE_SPGPU + +!!$ subroutine c_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_spk_) :: y(:) +!!$ class(psb_c_multivect_gpu) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine c_gpu_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine c_gpu_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ complex(psb_spk_) :: beta, x(:) +!!$ class(psb_c_multivect_gpu) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_c_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine c_gpu_multi_sctb +!!$ +!!$ subroutine c_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_spk_) :: beta, x(:) +!!$ class(psb_c_multivect_gpu) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine c_gpu_multi_sctb_x + + + subroutine c_gpu_multi_bld_x(x,this) + use psb_base_mod + complex(psb_spk_), intent(in) :: this(:,:) + class(psb_c_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_gpu_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine c_gpu_multi_bld_x + + subroutine c_gpu_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'c_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine c_gpu_multi_bld_n + + + subroutine c_gpu_multi_set_host(x) + implicit none + class(psb_c_multivect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine c_gpu_multi_set_host + + subroutine c_gpu_multi_set_dev(x) + implicit none + class(psb_c_multivect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine c_gpu_multi_set_dev + + subroutine c_gpu_multi_set_sync(x) + implicit none + class(psb_c_multivect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine c_gpu_multi_set_sync + + function c_gpu_multi_is_dev(x) result(res) + implicit none + class(psb_c_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function c_gpu_multi_is_dev + + function c_gpu_multi_is_host(x) result(res) + implicit none + class(psb_c_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function c_gpu_multi_is_host + + function c_gpu_multi_is_sync(x) result(res) + implicit none + class(psb_c_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function c_gpu_multi_is_sync + + + function c_gpu_multi_get_nrows(x) result(res) + implicit none + class(psb_c_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function c_gpu_multi_get_nrows + + function c_gpu_multi_get_ncols(x) result(res) + implicit none + class(psb_c_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function c_gpu_multi_get_ncols + + function c_gpu_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'cGPU' + end function c_gpu_multi_get_fmt + +!!$ function c_gpu_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_spk_) :: res +!!$ complex(psb_spk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = dzero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_c_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_c_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_c_multivect_gpu) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'c_gpu_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function c_gpu_multi_dot_v +!!$ +!!$ function c_gpu_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ complex(psb_spk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_spk_) :: res +!!$ complex(psb_spk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function c_gpu_multi_dot_a +!!$ +!!$ subroutine c_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_c_base_multivect_type), intent(inout) :: x +!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ complex(psb_spk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_c_base_multivect_type) +!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_c_multivect_gpu) +!!$ ! Do something different here +!!$ if ((beta /= dzero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_c_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_c_multivect_gpu) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine c_gpu_multi_mlt_v +!!$ +!!$ subroutine c_gpu_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_spk_), intent(in) :: x(:) +!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_c_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine c_gpu_multi_mlt_a +!!$ +!!$ subroutine c_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_spk_), intent(in) :: alpha,beta +!!$ complex(psb_spk_), intent(in) :: x(:) +!!$ complex(psb_spk_), intent(in) :: y(:) +!!$ class(psb_c_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_c_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine c_gpu_multi_mlt_a_2 +!!$ +!!$ subroutine c_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ complex(psb_spk_), intent(in) :: alpha,beta +!!$ class(psb_c_base_multivect_type), intent(inout) :: x +!!$ class(psb_c_base_multivect_type), intent(inout) :: y +!!$ class(psb_c_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_c_multivect_gpu) +!!$ select type (yy => y) +!!$ type is (psb_c_multivect_gpu) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_c_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_c_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine c_gpu_multi_mlt_v_2 + + + subroutine c_gpu_multi_set_scal(x,val) + class(psb_c_multivect_gpu), intent(inout) :: x + complex(psb_spk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_c_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine c_gpu_multi_set_scal + + subroutine c_gpu_multi_set_vect(x,val) + class(psb_c_multivect_gpu), intent(inout) :: x + complex(psb_spk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_c_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine c_gpu_multi_set_vect + + + +!!$ subroutine c_gpu_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ complex(psb_spk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_c_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine c_gpu_multi_scal +!!$ +!!$ +!!$ function c_gpu_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function c_gpu_multi_nrm2 +!!$ +!!$ function c_gpu_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function c_gpu_multi_amax +!!$ +!!$ function c_gpu_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function c_gpu_multi_asum + + subroutine c_gpu_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_multivect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=czero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_gpu_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine c_gpu_multi_all + + subroutine c_gpu_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_c_multivect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=dzero + call x%set_host() + end subroutine c_gpu_multi_zero + + subroutine c_gpu_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_multivect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_c_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc c_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => c_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_c_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_c_hdiag_csmm + procedure, pass(a) :: csmv => psb_c_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_c_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_c_hdiag_scals + ! procedure, pass(a) :: scalv => psb_c_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_c_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_c_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_c_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_c_mv_hdiag_from_fmt + procedure, pass(a) :: free => c_hdiag_free + procedure, pass(a) :: mold => psb_c_hdiag_mold + procedure, pass(a) :: to_gpu => psb_c_hdiag_to_gpu + final :: c_hdiag_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_hdiag_mold +#endif + end type psb_c_hdiag_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_hdiag_get_nzeros, c_hdiag_free, c_hdiag_get_fmt, & + & c_hdiag_get_size, c_hdiag_sizeof, c_hdiag_get_nz_row + + + interface + subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_c_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_c_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_c_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_c_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_c_hdiag_mold(a,b,info) + import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hdiag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hdiag_mold + end interface + + interface + subroutine psb_c_hdiag_to_gpu(a,info) + import :: psb_c_hdiag_sparse_mat, psb_ipk_ + class(psb_c_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hdiag_to_gpu + end interface + + interface + subroutine psb_c_cp_hdiag_from_coo(a,b,info) + import :: psb_c_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_c_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_c_mv_hdiag_from_coo(a,b,info) + import :: psb_c_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_c_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_c_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdiag_scal(d,a,info, side) +!!$ import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_c_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdiag_scals(d,a,info) +!!$ import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function c_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function c_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_hdia_sparse_mat%free() + + return + + end subroutine c_hdiag_free + + subroutine c_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_c_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_hdia_sparse_mat%free() + + return + end subroutine c_hdiag_finalize + +#else + + interface + subroutine psb_c_hdiag_mold(a,b,info) + import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hdiag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hdiag_mold + end interface + +#endif + +end module psb_c_hdiag_mat_mod diff --git a/gpu/psb_c_hlg_mat_mod.F90 b/gpu/psb_c_hlg_mat_mod.F90 new file mode 100644 index 00000000..9236a202 --- /dev/null +++ b/gpu/psb_c_hlg_mat_mod.F90 @@ -0,0 +1,398 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_hlg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use psb_c_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_hll_sparse_mat) :: psb_c_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => c_hlg_get_fmt + procedure, pass(a) :: sizeof => c_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_c_hlg_vect_mv + procedure, pass(a) :: csmm => psb_c_hlg_csmm + procedure, pass(a) :: csmv => psb_c_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_c_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_c_hlg_scals + procedure, pass(a) :: scalv => psb_c_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_c_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_mv_hlg_from_fmt + procedure, pass(a) :: free => c_hlg_free + procedure, pass(a) :: mold => psb_c_hlg_mold + procedure, pass(a) :: is_host => c_hlg_is_host + procedure, pass(a) :: is_dev => c_hlg_is_dev + procedure, pass(a) :: is_sync => c_hlg_is_sync + procedure, pass(a) :: set_host => c_hlg_set_host + procedure, pass(a) :: set_dev => c_hlg_set_dev + procedure, pass(a) :: set_sync => c_hlg_set_sync + procedure, pass(a) :: sync => c_hlg_sync + procedure, pass(a) :: from_gpu => psb_c_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_c_hlg_to_gpu + final :: c_hlg_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_hlg_mold +#endif + end type psb_c_hlg_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_hlg_get_nzeros, c_hlg_free, c_hlg_get_fmt, & + & c_hlg_get_size, c_hlg_sizeof, c_hlg_get_nz_row + + + interface + subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hlg_vect_mv + end interface + + interface + subroutine psb_c_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hlg_inner_vect_sv + end interface + + interface + subroutine psb_c_hlg_reallocate_nz(nz,a) + import :: psb_c_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_c_hlg_reallocate_nz + end interface + + interface + subroutine psb_c_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_c_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_hlg_allocate_mnnz + end interface + + interface + subroutine psb_c_hlg_mold(a,b,info) + import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hlg_mold + end interface + + interface + subroutine psb_c_hlg_from_gpu(a,info) + import :: psb_c_hlg_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hlg_from_gpu + end interface + + interface + subroutine psb_c_hlg_to_gpu(a,info, nzrm) + import :: psb_c_hlg_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_hlg_to_gpu + end interface + + interface + subroutine psb_c_cp_hlg_from_coo(a,b,info) + import :: psb_c_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hlg_from_coo + end interface + + interface + subroutine psb_c_cp_hlg_from_fmt(a,b,info) + import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hlg_from_fmt + end interface + + interface + subroutine psb_c_mv_hlg_from_coo(a,b,info) + import :: psb_c_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hlg_from_coo + end interface + + + interface + subroutine psb_c_mv_hlg_from_fmt(a,b,info) + import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hlg_from_fmt + end interface + + interface + subroutine psb_c_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hlg_csmv + end interface + interface + subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hlg_csmm + end interface + + interface + subroutine psb_c_hlg_scal(d,a,info, side) + import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_hlg_scal + end interface + + interface + subroutine psb_c_hlg_scals(d,a,info) + import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_hlg_sizeof(a) result(res) + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_hlg_sizeof + + function c_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function c_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_hll_sparse_mat%free() + + return + + end subroutine c_hlg_free + + + subroutine c_hlg_sync(a) + implicit none + class(psb_c_hlg_sparse_mat), target, intent(in) :: a + class(psb_c_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine c_hlg_sync + + subroutine c_hlg_set_host(a) + implicit none + class(psb_c_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_hlg_set_host + + subroutine c_hlg_set_dev(a) + implicit none + class(psb_c_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_hlg_set_dev + + subroutine c_hlg_set_sync(a) + implicit none + class(psb_c_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_hlg_set_sync + + function c_hlg_is_dev(a) result(res) + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_hlg_is_dev + + function c_hlg_is_host(a) result(res) + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_hlg_is_host + + function c_hlg_is_sync(a) result(res) + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_hlg_is_sync + + + subroutine c_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_c_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine c_hlg_finalize + +#else + + interface + subroutine psb_c_hlg_mold(a,b,info) + import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hlg_mold + end interface + +#endif + +end module psb_c_hlg_mat_mod diff --git a/gpu/psb_c_hybg_mat_mod.F90 b/gpu/psb_c_hybg_mat_mod.F90 new file mode 100644 index 00000000..d5c605ec --- /dev/null +++ b/gpu/psb_c_hybg_mat_mod.F90 @@ -0,0 +1,306 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if CUDA_SHORT_VERSION <= 10 + +module psb_c_hybg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use cusparse_mod + + type, extends(psb_c_csr_sparse_mat) :: psb_c_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(c_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => c_hybg_get_fmt + procedure, pass(a) :: sizeof => c_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_c_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_hybg_csmm + procedure, pass(a) :: csmv => psb_c_hybg_csmv + procedure, pass(a) :: scals => psb_c_hybg_scals + procedure, pass(a) :: scalv => psb_c_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_c_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_mv_hybg_from_fmt + procedure, pass(a) :: free => c_hybg_free + procedure, pass(a) :: mold => psb_c_hybg_mold + procedure, pass(a) :: to_gpu => psb_c_hybg_to_gpu + final :: c_hybg_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_hybg_mold +#endif + end type psb_c_hybg_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_hybg_get_nzeros, c_hybg_free, c_hybg_get_fmt, & + & c_hybg_get_size, c_hybg_sizeof, c_hybg_get_nz_row + + + interface + subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hybg_inner_vect_sv + end interface + + interface + subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hybg_vect_mv + end interface + + interface + subroutine psb_c_hybg_reallocate_nz(nz,a) + import :: psb_c_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_c_hybg_reallocate_nz + end interface + + interface + subroutine psb_c_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_c_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_hybg_allocate_mnnz + end interface + + interface + subroutine psb_c_hybg_mold(a,b,info) + import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hybg_mold + end interface + + interface + subroutine psb_c_hybg_to_gpu(a,info, nzrm) + import :: psb_c_hybg_sparse_mat, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_hybg_to_gpu + end interface + + interface + subroutine psb_c_cp_hybg_from_coo(a,b,info) + import :: psb_c_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hybg_from_coo + end interface + + interface + subroutine psb_c_cp_hybg_from_fmt(a,b,info) + import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hybg_from_fmt + end interface + + interface + subroutine psb_c_mv_hybg_from_coo(a,b,info) + import :: psb_c_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hybg_from_coo + end interface + + interface + subroutine psb_c_mv_hybg_from_fmt(a,b,info) + import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hybg_from_fmt + end interface + + interface + subroutine psb_c_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hybg_csmv + end interface + interface + subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hybg_csmm + end interface + + interface + subroutine psb_c_hybg_scal(d,a,info,side) + import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_hybg_scal + end interface + + interface + subroutine psb_c_hybg_scals(d,a,info) + import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_hybg_sizeof(a) result(res) + implicit none + class(psb_c_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_hybg_sizeof + + function c_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function c_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_c_csr_sparse_mat%free() + + return + + end subroutine c_hybg_free + + subroutine c_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_c_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine c_hybg_finalize + +#else + + interface + subroutine psb_c_hybg_mold(a,b,info) + import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hybg_mold + end interface + +#endif + +end module psb_c_hybg_mat_mod +#endif diff --git a/gpu/psb_c_vectordev_mod.F90 b/gpu/psb_c_vectordev_mod.F90 new file mode 100644 index 00000000..f3c243a6 --- /dev/null +++ b/gpu/psb_c_vectordev_mod.F90 @@ -0,0 +1,390 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_vectordev_mod + + use psb_base_vectordev_mod + +#ifdef HAVE_SPGPU + + interface registerMapped + function registerMappedFloatComplex(buf,d_p,n,dummy) & + & result(res) bind(c,name='registerMappedFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: buf + type(c_ptr) :: d_p + integer(c_int),value :: n + complex(c_float_complex), value :: dummy + end function registerMappedFloatComplex + end interface + + interface writeMultiVecDevice + function writeMultiVecDeviceFloatComplex(deviceVec,hostVec) & + & result(res) bind(c,name='writeMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + complex(c_float_complex) :: hostVec(*) + end function writeMultiVecDeviceFloatComplex + function writeMultiVecDeviceFloatComplexR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='writeMultiVecDeviceFloatComplexR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + complex(c_float_complex) :: hostVec(ld,*) + end function writeMultiVecDeviceFloatComplexR2 + end interface + + interface readMultiVecDevice + function readMultiVecDeviceFloatComplex(deviceVec,hostVec) & + & result(res) bind(c,name='readMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + complex(c_float_complex) :: hostVec(*) + end function readMultiVecDeviceFloatComplex + function readMultiVecDeviceFloatComplexR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='readMultiVecDeviceFloatComplexR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + complex(c_float_complex) :: hostVec(ld,*) + end function readMultiVecDeviceFloatComplexR2 + end interface + + interface allocateFloatComplex + function allocateFloatComplex(didx,n) & + & result(res) bind(c,name='allocateFloatComplex') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: n + integer(c_int) :: res + end function allocateFloatComplex + function allocateMultiFloatComplex(didx,m,n) & + & result(res) bind(c,name='allocateMultiFloatComplex') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: m,n + integer(c_int) :: res + end function allocateMultiFloatComplex + end interface + + interface writeFloatComplex + function writeFloatComplex(didx,hidx,n) & + & result(res) bind(c,name='writeFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_float_complex) :: hidx(*) + integer(c_int),value :: n + end function writeFloatComplex + function writeFloatComplexFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='writeFloatComplexFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_float_complex) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function writeFloatComplexFirst + function writeMultiFloatComplex(didx,hidx,m,n) & + & result(res) bind(c,name='writeMultiFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_float_complex) :: hidx(m,*) + integer(c_int),value :: m,n + end function writeMultiFloatComplex + end interface + + interface readFloatComplex + function readFloatComplex(didx,hidx,n) & + & result(res) bind(c,name='readFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_float_complex) :: hidx(*) + integer(c_int),value :: n + end function readFloatComplex + function readFloatComplexFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='readFloatComplexFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_float_complex) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function readFloatComplexFirst + function readMultiFloatComplex(didx,hidx,m,n) & + & result(res) bind(c,name='readMultiFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_float_complex) :: hidx(m,*) + integer(c_int),value :: m,n + end function readMultiFloatComplex + end interface + + interface + subroutine freeFloatComplex(didx) & + & bind(c,name='freeFloatComplex') + use iso_c_binding + type(c_ptr), value :: didx + end subroutine freeFloatComplex + end interface + + + interface setScalDevice + function setScalMultiVecDeviceFloatComplex(val, first, last, & + & indexBase, deviceVecX) result(res) & + & bind(c,name='setscalMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: first,last,indexbase + complex(c_float_complex), value :: val + type(c_ptr), value :: deviceVecX + end function setScalMultiVecDeviceFloatComplex + end interface + + interface + function geinsMultiVecDeviceFloatComplex(n,deviceVecIrl,deviceVecVal,& + & dupl,indexbase,deviceVecX) & + & result(res) bind(c,name='geinsMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n, dupl,indexbase + type(c_ptr), value :: deviceVecIrl, deviceVecVal, deviceVecX + end function geinsMultiVecDeviceFloatComplex + end interface + + ! New gather functions + + interface + function igathMultiVecDeviceFloatComplex(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceFloatComplex + end interface + + interface + function igathMultiVecDeviceFloatComplexVecIdx(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceFloatComplexVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceFloatComplexVecIdx + end interface + + interface + function iscatMultiVecDeviceFloatComplex(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + complex(c_float_complex),value :: beta + end function iscatMultiVecDeviceFloatComplex + end interface + + interface + function iscatMultiVecDeviceFloatComplexVecIdx(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceFloatComplexVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + complex(c_float_complex),value :: beta + end function iscatMultiVecDeviceFloatComplexVecIdx + end interface + + + interface scalMultiVecDevice + function scalMultiVecDeviceFloatComplex(alpha,deviceVecA) & + & result(val) bind(c,name='scalMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + complex(c_float_complex), value :: alpha + type(c_ptr), value :: deviceVecA + end function scalMultiVecDeviceFloatComplex + end interface + + interface dotMultiVecDevice + function dotMultiVecDeviceFloatComplex(res, n,deviceVecA,deviceVecB) & + & result(val) bind(c,name='dotMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + complex(c_float_complex) :: res + type(c_ptr), value :: deviceVecA, deviceVecB + end function dotMultiVecDeviceFloatComplex + end interface + + interface nrm2MultiVecDeviceComplex + function nrm2MultiVecDeviceFloatComplex(res,n,deviceVecA) & + & result(val) bind(c,name='nrm2MultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_float) :: res + type(c_ptr), value :: deviceVecA + end function nrm2MultiVecDeviceFloatComplex + end interface + + interface amaxMultiVecDeviceComplex + function amaxMultiVecDeviceFloatComplex(res,n,deviceVecA) & + & result(val) bind(c,name='amaxMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_float) :: res + type(c_ptr), value :: deviceVecA + end function amaxMultiVecDeviceFloatComplex + end interface + + interface asumMultiVecDeviceComplex + function asumMultiVecDeviceFloatComplex(res,n,deviceVecA) & + & result(val) bind(c,name='asumMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_float) :: res + type(c_ptr), value :: deviceVecA + end function asumMultiVecDeviceFloatComplex + end interface + + + interface axpbyMultiVecDevice + function axpbyMultiVecDeviceFloatComplex(n,alpha,deviceVecA,beta,deviceVecB) & + & result(res) bind(c,name='axpbyMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_float_complex), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB + end function axpbyMultiVecDeviceFloatComplex + end interface + + interface axyMultiVecDevice + function axyMultiVecDeviceFloatComplex(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='axyMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_float_complex), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function axyMultiVecDeviceFloatComplex + end interface + + interface axybzMultiVecDevice + function axybzMultiVecDeviceFloatComplex(n,alpha,deviceVecA,deviceVecB,beta,deviceVecZ) & + & result(res) bind(c,name='axybzMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_float_complex), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB,deviceVecZ + end function axybzMultiVecDeviceFloatComplex + end interface + + + interface absMultiVecDevice + function absMultiVecDeviceFloatComplex(n,alpha,deviceVecA) & + & result(res) bind(c,name='absMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_float_complex), value :: alpha + type(c_ptr), value :: deviceVecA + end function absMultiVecDeviceFloatComplex + function absMultiVecDeviceFloatComplex2(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='absMultiVecDeviceFloatComplex2') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_float_complex), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function absMultiVecDeviceFloatComplex2 + end interface + + interface inner_register + module procedure inner_registerFloatComplex + end interface + + interface inner_unregister + module procedure inner_unregisterFloatComplex + end interface + +contains + + + function inner_registerFloatComplex(buffer,dval) result(res) + complex(c_float_complex), allocatable, target :: buffer(:) + type(c_ptr) :: dval + integer(c_int) :: res + complex(c_float_complex) :: dummy + res = registerMapped(c_loc(buffer),dval,size(buffer), dummy) + end function inner_registerFloatComplex + + subroutine inner_unregisterFloatComplex(buffer) + complex(c_float_complex), allocatable, target :: buffer(:) + + call unregisterMapped(c_loc(buffer)) + end subroutine inner_unregisterFloatComplex + +#endif + +end module psb_c_vectordev_mod diff --git a/gpu/psb_d_csrg_mat_mod.F90 b/gpu/psb_d_csrg_mat_mod.F90 new file mode 100644 index 00000000..177c7440 --- /dev/null +++ b/gpu/psb_d_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_csrg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_csr_sparse_mat) :: psb_d_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(d_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => d_csrg_get_fmt + procedure, pass(a) :: sizeof => d_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_d_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_csrg_csmm + procedure, pass(a) :: csmv => psb_d_csrg_csmv + procedure, pass(a) :: scals => psb_d_csrg_scals + procedure, pass(a) :: scalv => psb_d_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_d_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_mv_csrg_from_fmt + procedure, pass(a) :: free => d_csrg_free + procedure, pass(a) :: mold => psb_d_csrg_mold + procedure, pass(a) :: is_host => d_csrg_is_host + procedure, pass(a) :: is_dev => d_csrg_is_dev + procedure, pass(a) :: is_sync => d_csrg_is_sync + procedure, pass(a) :: set_host => d_csrg_set_host + procedure, pass(a) :: set_dev => d_csrg_set_dev + procedure, pass(a) :: set_sync => d_csrg_set_sync + procedure, pass(a) :: sync => d_csrg_sync + procedure, pass(a) :: to_gpu => psb_d_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_d_csrg_from_gpu + final :: d_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_csrg_mold +#endif + end type psb_d_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_csrg_get_nzeros, d_csrg_free, d_csrg_get_fmt, & + & d_csrg_get_size, d_csrg_sizeof, d_csrg_get_nz_row + + + interface + subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_csrg_vect_mv + end interface + + interface + subroutine psb_d_csrg_reallocate_nz(nz,a) + import :: psb_d_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_d_csrg_reallocate_nz + end interface + + interface + subroutine psb_d_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_d_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_csrg_allocate_mnnz + end interface + + interface + subroutine psb_d_csrg_mold(a,b,info) + import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_csrg_mold + end interface + + interface + subroutine psb_d_csrg_to_gpu(a,info, nzrm) + import :: psb_d_csrg_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_csrg_to_gpu + end interface + + interface + subroutine psb_d_csrg_from_gpu(a,info) + import :: psb_d_csrg_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_csrg_from_gpu + end interface + + interface + subroutine psb_d_cp_csrg_from_coo(a,b,info) + import :: psb_d_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_csrg_from_coo + end interface + + interface + subroutine psb_d_cp_csrg_from_fmt(a,b,info) + import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_csrg_from_fmt + end interface + + interface + subroutine psb_d_mv_csrg_from_coo(a,b,info) + import :: psb_d_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_csrg_from_coo + end interface + + interface + subroutine psb_d_mv_csrg_from_fmt(a,b,info) + import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_csrg_from_fmt + end interface + + interface + subroutine psb_d_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_csrg_csmv + end interface + interface + subroutine psb_d_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_csrg_csmm + end interface + + interface + subroutine psb_d_csrg_scal(d,a,info,side) + import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_csrg_scal + end interface + + interface + subroutine psb_d_csrg_scals(d,a,info) + import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_csrg_sizeof(a) result(res) + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_csrg_sizeof + + function d_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function d_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine d_csrg_set_host(a) + implicit none + class(psb_d_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_csrg_set_host + + subroutine d_csrg_set_dev(a) + implicit none + class(psb_d_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_csrg_set_dev + + subroutine d_csrg_set_sync(a) + implicit none + class(psb_d_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_csrg_set_sync + + function d_csrg_is_dev(a) result(res) + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_csrg_is_dev + + function d_csrg_is_host(a) result(res) + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_csrg_is_host + + function d_csrg_is_sync(a) result(res) + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_csrg_is_sync + + + subroutine d_csrg_sync(a) + implicit none + class(psb_d_csrg_sparse_mat), target, intent(in) :: a + class(psb_d_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine d_csrg_sync + + subroutine d_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_d_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_d_csr_sparse_mat%free() + + return + + end subroutine d_csrg_free + + subroutine d_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_d_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine d_csrg_finalize + +#else + interface + subroutine psb_d_csrg_mold(a,b,info) + import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_csrg_mold + end interface + +#endif + +end module psb_d_csrg_mat_mod diff --git a/gpu/psb_d_diag_mat_mod.F90 b/gpu/psb_d_diag_mat_mod.F90 new file mode 100644 index 00000000..564f7a13 --- /dev/null +++ b/gpu/psb_d_diag_mat_mod.F90 @@ -0,0 +1,308 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_d_dia_mat_mod + + type, extends(psb_d_dia_sparse_mat) :: psb_d_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => d_diag_get_fmt + procedure, pass(a) :: sizeof => d_diag_sizeof + procedure, pass(a) :: vect_mv => psb_d_diag_vect_mv +! procedure, pass(a) :: csmm => psb_d_diag_csmm + procedure, pass(a) :: csmv => psb_d_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_d_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_d_diag_scals +! procedure, pass(a) :: scalv => psb_d_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_d_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_d_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_d_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_d_mv_diag_from_fmt + procedure, pass(a) :: free => d_diag_free + procedure, pass(a) :: mold => psb_d_diag_mold + procedure, pass(a) :: to_gpu => psb_d_diag_to_gpu + final :: d_diag_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_diag_mold +#endif + end type psb_d_diag_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_diag_get_nzeros, d_diag_free, d_diag_get_fmt, & + & d_diag_get_size, d_diag_sizeof, d_diag_get_nz_row + + + interface + subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_diag_vect_mv + end interface + + interface + subroutine psb_d_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_diag_inner_vect_sv + end interface + + interface + subroutine psb_d_diag_reallocate_nz(nz,a) + import :: psb_d_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_diag_sparse_mat), intent(inout) :: a + end subroutine psb_d_diag_reallocate_nz + end interface + + interface + subroutine psb_d_diag_allocate_mnnz(m,n,a,nz) + import :: psb_d_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_diag_allocate_mnnz + end interface + + interface + subroutine psb_d_diag_mold(a,b,info) + import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_diag_mold + end interface + + interface + subroutine psb_d_diag_to_gpu(a,info, nzrm) + import :: psb_d_diag_sparse_mat, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_diag_to_gpu + end interface + + interface + subroutine psb_d_cp_diag_from_coo(a,b,info) + import :: psb_d_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_diag_from_coo + end interface + + interface + subroutine psb_d_cp_diag_from_fmt(a,b,info) + import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_diag_from_fmt + end interface + + interface + subroutine psb_d_mv_diag_from_coo(a,b,info) + import :: psb_d_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_diag_from_coo + end interface + + + interface + subroutine psb_d_mv_diag_from_fmt(a,b,info) + import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_diag_from_fmt + end interface + + interface + subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_diag_csmv + end interface + interface + subroutine psb_d_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_diag_csmm + end interface + + interface + subroutine psb_d_diag_scal(d,a,info, side) + import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_diag_scal + end interface + + interface + subroutine psb_d_diag_scals(d,a,info) + import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_diag_sizeof(a) result(res) + implicit none + class(psb_d_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + psb_sizeof_dp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_diag_sizeof + + function d_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function d_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_dia_sparse_mat%free() + + return + + end subroutine d_diag_free + + subroutine d_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_d_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine d_diag_finalize + +#else + + interface + subroutine psb_d_diag_mold(a,b,info) + import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_diag_mold + end interface + +#endif + +end module psb_d_diag_mat_mod diff --git a/gpu/psb_d_dnsg_mat_mod.F90 b/gpu/psb_d_dnsg_mat_mod.F90 new file mode 100644 index 00000000..966c2311 --- /dev/null +++ b/gpu/psb_d_dnsg_mat_mod.F90 @@ -0,0 +1,294 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_dnsg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use psb_d_dns_mat_mod + use dnsdev_mod + + type, extends(psb_d_dns_sparse_mat) :: psb_d_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of DNS, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => d_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => d_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_d_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_d_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_d_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_d_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_d_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_d_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_d_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_d_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_d_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_mv_dnsg_from_fmt + procedure, pass(a) :: free => d_dnsg_free + procedure, pass(a) :: mold => psb_d_dnsg_mold + procedure, pass(a) :: to_gpu => psb_d_dnsg_to_gpu + final :: d_dnsg_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_dnsg_mold +#endif + end type psb_d_dnsg_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_dnsg_get_nzeros, d_dnsg_free, d_dnsg_get_fmt, & + & d_dnsg_get_size, d_dnsg_get_nz_row + + + interface + subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_d_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_d_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_d_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_d_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_d_dnsg_mold(a,b,info) + import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dnsg_mold + end interface + + interface + subroutine psb_d_dnsg_to_gpu(a,info) + import :: psb_d_dnsg_sparse_mat, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dnsg_to_gpu + end interface + + interface + subroutine psb_d_cp_dnsg_from_coo(a,b,info) + import :: psb_d_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dnsg_from_coo + end interface + + interface + subroutine psb_d_cp_dnsg_from_fmt(a,b,info) + import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_d_mv_dnsg_from_coo(a,b,info) + import :: psb_d_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_d_mv_dnsg_from_fmt(a,b,info) + import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_d_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_d_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_dnsg_scal(d,a,info, side) +!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_d_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_dnsg_scals(d,a,info) +!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function d_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function d_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_dns_sparse_mat%free() + + return + + end subroutine d_dnsg_free + + subroutine d_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_d_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine d_dnsg_finalize + +#else + + interface + subroutine psb_d_dnsg_mold(a,b,info) + import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dnsg_mold + end interface + +#endif + +end module psb_d_dnsg_mat_mod diff --git a/gpu/psb_d_elg_mat_mod.F90 b/gpu/psb_d_elg_mat_mod.F90 new file mode 100644 index 00000000..eac7bb36 --- /dev/null +++ b/gpu/psb_d_elg_mat_mod.F90 @@ -0,0 +1,483 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_elg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use psb_d_ell_mat_mod + use psb_i_gpu_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_ell_sparse_mat) :: psb_d_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => d_elg_get_fmt + procedure, pass(a) :: sizeof => d_elg_sizeof + procedure, pass(a) :: vect_mv => psb_d_elg_vect_mv + procedure, pass(a) :: csmm => psb_d_elg_csmm + procedure, pass(a) :: csmv => psb_d_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_d_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_d_elg_scals + procedure, pass(a) :: scalv => psb_d_elg_scal + procedure, pass(a) :: reallocate_nz => psb_d_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_elg_allocate_mnnz + procedure, pass(a) :: reinit => d_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_mv_elg_from_fmt + procedure, pass(a) :: free => d_elg_free + procedure, pass(a) :: mold => psb_d_elg_mold + procedure, pass(a) :: csput_a => psb_d_elg_csput_a + procedure, pass(a) :: csput_v => psb_d_elg_csput_v + procedure, pass(a) :: is_host => d_elg_is_host + procedure, pass(a) :: is_dev => d_elg_is_dev + procedure, pass(a) :: is_sync => d_elg_is_sync + procedure, pass(a) :: set_host => d_elg_set_host + procedure, pass(a) :: set_dev => d_elg_set_dev + procedure, pass(a) :: set_sync => d_elg_set_sync + procedure, pass(a) :: sync => d_elg_sync + procedure, pass(a) :: from_gpu => psb_d_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_d_elg_to_gpu + procedure, pass(a) :: asb => psb_d_elg_asb + final :: d_elg_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_elg_mold + procedure, pass(a) :: asb => psb_d_elg_asb +#endif + end type psb_d_elg_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_elg_get_nzeros, d_elg_free, d_elg_get_fmt, & + & d_elg_get_size, d_elg_sizeof, d_elg_get_nz_row, d_elg_sync + + + interface + subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_elg_vect_mv + end interface + + interface + subroutine psb_d_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_elg_inner_vect_sv + end interface + + interface + subroutine psb_d_elg_reallocate_nz(nz,a) + import :: psb_d_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_elg_reallocate_nz + end interface + + interface + subroutine psb_d_elg_allocate_mnnz(m,n,a,nz) + import :: psb_d_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_elg_allocate_mnnz + end interface + + interface + subroutine psb_d_elg_mold(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_elg_mold + end interface + + interface + subroutine psb_d_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_elg_csput_a + end interface + + interface + subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_d_base_vect_type,& + & psb_i_base_vect_type + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_elg_csput_v + end interface + + interface + subroutine psb_d_elg_from_gpu(a,info) + import :: psb_d_elg_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_elg_from_gpu + end interface + + interface + subroutine psb_d_elg_to_gpu(a,info, nzrm) + import :: psb_d_elg_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_elg_to_gpu + end interface + + interface + subroutine psb_d_cp_elg_from_coo(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_elg_from_coo + end interface + + interface + subroutine psb_d_cp_elg_from_fmt(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_elg_from_fmt + end interface + + interface + subroutine psb_d_mv_elg_from_coo(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_elg_from_coo + end interface + + + interface + subroutine psb_d_mv_elg_from_fmt(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_elg_from_fmt + end interface + + interface + subroutine psb_d_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_elg_csmv + end interface + interface + subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_elg_csmm + end interface + + interface + subroutine psb_d_elg_scal(d,a,info, side) + import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_elg_scal + end interface + + interface + subroutine psb_d_elg_scals(d,a,info) + import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_elg_scals + end interface + + interface + subroutine psb_d_elg_asb(a) + import :: psb_d_elg_sparse_mat + class(psb_d_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_elg_sizeof(a) result(res) + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_elg_sizeof + + function d_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function d_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine d_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_d_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = dzero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine d_elg_reinit + + subroutine d_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_d_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine d_elg_free + + subroutine d_elg_sync(a) + implicit none + class(psb_d_elg_sparse_mat), target, intent(in) :: a + class(psb_d_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine d_elg_sync + + subroutine d_elg_set_host(a) + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_elg_set_host + + subroutine d_elg_set_dev(a) + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_elg_set_dev + + subroutine d_elg_set_sync(a) + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_elg_set_sync + + function d_elg_is_dev(a) result(res) + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_elg_is_dev + + function d_elg_is_host(a) result(res) + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_elg_is_host + + function d_elg_is_sync(a) result(res) + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_elg_is_sync + + subroutine d_elg_finalize(a) + use elldev_mod + implicit none + type(psb_d_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine d_elg_finalize + +#else + + interface + subroutine psb_d_elg_asb(a) + import :: psb_d_elg_sparse_mat + class(psb_d_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_elg_asb + end interface + + interface + subroutine psb_d_elg_mold(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_elg_mold + end interface + +#endif + +end module psb_d_elg_mat_mod diff --git a/gpu/psb_d_gpu_vect_mod.F90 b/gpu/psb_d_gpu_vect_mod.F90 new file mode 100644 index 00000000..cd3757c3 --- /dev/null +++ b/gpu/psb_d_gpu_vect_mod.F90 @@ -0,0 +1,1989 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_gpu_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_d_vect_mod + use psb_i_vect_mod +#ifdef HAVE_SPGPU + use psb_gpu_env_mod + use psb_i_gpu_vect_mod + use psb_i_vectordev_mod + use psb_d_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_base_vect_type) :: psb_d_vect_gpu +#ifdef HAVE_SPGPU + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + real(c_double), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => d_gpu_get_nrows + procedure, nopass :: get_fmt => d_gpu_get_fmt + + procedure, pass(x) :: all => d_gpu_all + procedure, pass(x) :: zero => d_gpu_zero + procedure, pass(x) :: asb_m => d_gpu_asb_m + procedure, pass(x) :: sync => d_gpu_sync + procedure, pass(x) :: sync_space => d_gpu_sync_space + procedure, pass(x) :: bld_x => d_gpu_bld_x + procedure, pass(x) :: bld_mn => d_gpu_bld_mn + procedure, pass(x) :: free => d_gpu_free + procedure, pass(x) :: ins_a => d_gpu_ins_a + procedure, pass(x) :: ins_v => d_gpu_ins_v + procedure, pass(x) :: is_host => d_gpu_is_host + procedure, pass(x) :: is_dev => d_gpu_is_dev + procedure, pass(x) :: is_sync => d_gpu_is_sync + procedure, pass(x) :: set_host => d_gpu_set_host + procedure, pass(x) :: set_dev => d_gpu_set_dev + procedure, pass(x) :: set_sync => d_gpu_set_sync + procedure, pass(x) :: set_scal => d_gpu_set_scal +!!$ procedure, pass(x) :: set_vect => d_gpu_set_vect + procedure, pass(x) :: gthzv_x => d_gpu_gthzv_x + procedure, pass(y) :: sctb => d_gpu_sctb + procedure, pass(y) :: sctb_x => d_gpu_sctb_x + procedure, pass(x) :: gthzbuf => d_gpu_gthzbuf + procedure, pass(y) :: sctb_buf => d_gpu_sctb_buf + procedure, pass(x) :: new_buffer => d_gpu_new_buffer + procedure, nopass :: device_wait => d_gpu_device_wait + procedure, pass(x) :: free_buffer => d_gpu_free_buffer + procedure, pass(x) :: maybe_free_buffer => d_gpu_maybe_free_buffer + procedure, pass(x) :: dot_v => d_gpu_dot_v + procedure, pass(x) :: dot_a => d_gpu_dot_a + procedure, pass(y) :: axpby_v => d_gpu_axpby_v + procedure, pass(y) :: axpby_a => d_gpu_axpby_a + procedure, pass(y) :: mlt_v => d_gpu_mlt_v + procedure, pass(y) :: mlt_a => d_gpu_mlt_a + procedure, pass(z) :: mlt_a_2 => d_gpu_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => d_gpu_mlt_v_2 + procedure, pass(x) :: scal => d_gpu_scal + procedure, pass(x) :: nrm2 => d_gpu_nrm2 + procedure, pass(x) :: amax => d_gpu_amax + procedure, pass(x) :: asum => d_gpu_asum + procedure, pass(x) :: absval1 => d_gpu_absval1 + procedure, pass(x) :: absval2 => d_gpu_absval2 + + final :: d_gpu_vect_finalize +#endif + end type psb_d_vect_gpu + + public :: psb_d_vect_gpu_ + private :: constructor + interface psb_d_vect_gpu_ + module procedure constructor + end interface psb_d_vect_gpu_ + +contains + + function constructor(x) result(this) + real(psb_dpk_) :: x(:) + type(psb_d_vect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + +#ifdef HAVE_SPGPU + + subroutine d_gpu_device_wait() + call psb_cudaSync() + end subroutine d_gpu_device_wait + + subroutine d_gpu_new_buffer(n,x,info) + use psb_realloc_mod + use psb_gpu_env_mod + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_gpu_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDouble(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDouble(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDouble(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDouble(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDouble(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceDouble(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDouble(x%dt_buf,y,n) + + end select + + end subroutine d_gpu_gthzv_x + + subroutine d_gpu_gthzbuf(i,n,idx,x) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i,n + class(psb_i_base_vect_type) :: idx + class(psb_d_vect_gpu) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readDouble(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceDouble(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readDouble(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine d_gpu_gthzbuf + + subroutine d_gpu_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_ipk_) :: n, idx(:) + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_gpu) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_d_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine d_gpu_sctb + + subroutine d_gpu_sctb_x(i,n,idx,x,beta,y) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_gpu) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_gpu_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDouble(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDouble(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeDouble(y%dt_buf,x,n) + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDouble(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDouble(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeDouble(y%dt_buf,x,n) + info = iscatMultiVecDeviceDouble(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine d_gpu_sctb_x + + subroutine d_gpu_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_gpu_env_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta + class(psb_d_vect_gpu) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_gpu) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_gpu_DeviceHasUVA()) then + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeDouble(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeDouble(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceDouble(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine d_gpu_sctb_buf + + + subroutine d_gpu_bld_x(x,this) + use psb_base_mod + real(psb_dpk_), intent(in) :: this(:) + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_gpu_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine d_gpu_bld_x + + subroutine d_gpu_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'d_gpu_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine d_gpu_bld_mn + + subroutine d_gpu_set_host(x) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine d_gpu_set_host + + subroutine d_gpu_set_dev(x) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine d_gpu_set_dev + + subroutine d_gpu_set_sync(x) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine d_gpu_set_sync + + function d_gpu_is_dev(x) result(res) + implicit none + class(psb_d_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function d_gpu_is_dev + + function d_gpu_is_host(x) result(res) + implicit none + class(psb_d_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function d_gpu_is_host + + function d_gpu_is_sync(x) result(res) + implicit none + class(psb_d_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function d_gpu_is_sync + + + function d_gpu_get_nrows(x) result(res) + implicit none + class(psb_d_vect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function d_gpu_get_nrows + + function d_gpu_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'dGPU' + end function d_gpu_get_fmt + + subroutine d_gpu_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_d_vect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_gpu_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine d_gpu_all + + subroutine d_gpu_zero(x) + use psi_serial_mod + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=dzero + call x%set_host() + end subroutine d_gpu_zero + + subroutine d_gpu_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_d_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'d_gpu_sync') + end if + + end subroutine d_gpu_sync + + subroutine d_gpu_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine d_gpu_free + + subroutine d_gpu_set_scal(x,val,first,last) + class(psb_d_vect_gpu), intent(inout) :: x + real(psb_dpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + if (x%is_host()) call x%sync() + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine d_gpu_set_scal +!!$ +!!$ subroutine d_gpu_set_vect(x,val) +!!$ class(psb_d_vect_gpu), intent(inout) :: x +!!$ real(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_) :: nr +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_d_base_vect_type%set_vect(val) +!!$ call x%set_host() +!!$ +!!$ end subroutine d_gpu_set_vect + + + + function d_gpu_dot_v(n,x,y) result(res) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + real(psb_dpk_), external :: ddot + integer(psb_ipk_) :: info + + res = dzero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_d_vect + ! + select type(yy => y) + type is (psb_d_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n,x%v,1,yy%v,1) + type is (psb_d_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'d_gpu_dot_v') + end if + + class default + ! y%sync is done in dot_a + call x%sync() + res = y%dot(n,x%v) + end select + + end function d_gpu_dot_v + + function d_gpu_dot_a(n,x,y) result(res) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + real(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + real(psb_dpk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n,y,1,x%v,1) + + end function d_gpu_dot_a + + subroutine d_gpu_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_gpu), intent(inout) :: y + real(psb_dpk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_d_vect_gpu) + ! Do something different here + if ((beta /= dzero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + type is (psb_d_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_d_vect_gpu) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine d_gpu_mlt_v + + subroutine d_gpu_mlt_a(x, y, info) + use psi_serial_mod + implicit none + real(psb_dpk_), intent(in) :: x(:) + class(psb_d_vect_gpu), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_d_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine d_gpu_mlt_a + + subroutine d_gpu_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + real(psb_dpk_), intent(in) :: alpha,beta + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(in) :: y(:) + class(psb_d_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_d_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine d_gpu_mlt_a_2 + + subroutine d_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + real(psb_dpk_), intent(in) :: alpha,beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_d_vect_gpu) + select type (yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= dzero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= dzero).and.(z%is_dev())) call z%sync() + call z%psb_d_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= dzero).and.(z%is_dev())) call z%sync() + call z%psb_d_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine d_gpu_mlt_v_2 + + subroutine d_gpu_scal(alpha, x) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + real(psb_dpk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine d_gpu_scal + + + function d_gpu_nrm2(n,x) result(res) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDevice(res,n,x%deviceVect) + + end function d_gpu_nrm2 + + function d_gpu_amax(n,x) result(res) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDevice(res,n,x%deviceVect) + + end function d_gpu_amax + + function d_gpu_asum(n,x) result(res) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDevice(res,n,x%deviceVect) + + end function d_gpu_asum + + subroutine d_gpu_absval1(x) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,done,x%deviceVect) + + end subroutine d_gpu_absval1 + + subroutine d_gpu_absval2(x,y) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_d_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,done,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_d_base_vect_type%absval(y) + end select + end subroutine d_gpu_absval2 + + + subroutine d_gpu_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine d_gpu_vect_finalize + + subroutine d_gpu_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_d_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_gpu + + info = 0 + if (psb_errstatus_fatal()) return + + done_gpu = .false. + select type(virl => irl) + class is (psb_i_vect_gpu) + select type(vval => val) + class is (psb_d_vect_gpu) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceDouble(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_gpu=.true. + end select + end select + + if (.not.done_gpu) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'gpu_vect_ins') + return + end if + + end subroutine d_gpu_ins_v + + subroutine d_gpu_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_d_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine d_gpu_ins_a + +#endif + +end module psb_d_gpu_vect_mod + + +! +! Multivectors +! + + + +module psb_d_gpu_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_d_multivect_mod + use psb_d_base_multivect_mod + + use psb_i_multivect_mod +#ifdef HAVE_SPGPU + use psb_i_gpu_multivect_mod + use psb_d_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_base_multivect_type) :: psb_d_multivect_gpu +#ifdef HAVE_SPGPU + + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => d_gpu_multi_get_nrows + procedure, pass(x) :: get_ncols => d_gpu_multi_get_ncols + procedure, nopass :: get_fmt => d_gpu_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => d_gpu_multi_dot_v +!!$ procedure, pass(x) :: dot_a => d_gpu_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => d_gpu_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => d_gpu_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => d_gpu_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => d_gpu_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => d_gpu_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => d_gpu_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => d_gpu_multi_scal +!!$ procedure, pass(x) :: nrm2 => d_gpu_multi_nrm2 +!!$ procedure, pass(x) :: amax => d_gpu_multi_amax +!!$ procedure, pass(x) :: asum => d_gpu_multi_asum + procedure, pass(x) :: all => d_gpu_multi_all + procedure, pass(x) :: zero => d_gpu_multi_zero + procedure, pass(x) :: asb => d_gpu_multi_asb + procedure, pass(x) :: sync => d_gpu_multi_sync + procedure, pass(x) :: sync_space => d_gpu_multi_sync_space + procedure, pass(x) :: bld_x => d_gpu_multi_bld_x + procedure, pass(x) :: bld_n => d_gpu_multi_bld_n + procedure, pass(x) :: free => d_gpu_multi_free + procedure, pass(x) :: ins => d_gpu_multi_ins + procedure, pass(x) :: is_host => d_gpu_multi_is_host + procedure, pass(x) :: is_dev => d_gpu_multi_is_dev + procedure, pass(x) :: is_sync => d_gpu_multi_is_sync + procedure, pass(x) :: set_host => d_gpu_multi_set_host + procedure, pass(x) :: set_dev => d_gpu_multi_set_dev + procedure, pass(x) :: set_sync => d_gpu_multi_set_sync + procedure, pass(x) :: set_scal => d_gpu_multi_set_scal + procedure, pass(x) :: set_vect => d_gpu_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => d_gpu_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => d_gpu_multi_sctb +!!$ procedure, pass(y) :: sctb_x => d_gpu_multi_sctb_x + final :: d_gpu_multi_vect_finalize +#endif + end type psb_d_multivect_gpu + + public :: psb_d_multivect_gpu + private :: constructor + interface psb_d_multivect_gpu + module procedure constructor + end interface + +contains + + function constructor(x) result(this) + real(psb_dpk_) :: x(:,:) + type(psb_d_multivect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function constructor + +#ifdef HAVE_SPGPU + +!!$ subroutine d_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_dpk_) :: y(:) +!!$ class(psb_d_multivect_gpu) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine d_gpu_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine d_gpu_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ real(psb_dpk_) :: beta, x(:) +!!$ class(psb_d_multivect_gpu) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_d_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine d_gpu_multi_sctb +!!$ +!!$ subroutine d_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_dpk_) :: beta, x(:) +!!$ class(psb_d_multivect_gpu) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine d_gpu_multi_sctb_x + + + subroutine d_gpu_multi_bld_x(x,this) + use psb_base_mod + real(psb_dpk_), intent(in) :: this(:,:) + class(psb_d_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_gpu_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine d_gpu_multi_bld_x + + subroutine d_gpu_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'d_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine d_gpu_multi_bld_n + + + subroutine d_gpu_multi_set_host(x) + implicit none + class(psb_d_multivect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine d_gpu_multi_set_host + + subroutine d_gpu_multi_set_dev(x) + implicit none + class(psb_d_multivect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine d_gpu_multi_set_dev + + subroutine d_gpu_multi_set_sync(x) + implicit none + class(psb_d_multivect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine d_gpu_multi_set_sync + + function d_gpu_multi_is_dev(x) result(res) + implicit none + class(psb_d_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function d_gpu_multi_is_dev + + function d_gpu_multi_is_host(x) result(res) + implicit none + class(psb_d_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function d_gpu_multi_is_host + + function d_gpu_multi_is_sync(x) result(res) + implicit none + class(psb_d_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function d_gpu_multi_is_sync + + + function d_gpu_multi_get_nrows(x) result(res) + implicit none + class(psb_d_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function d_gpu_multi_get_nrows + + function d_gpu_multi_get_ncols(x) result(res) + implicit none + class(psb_d_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function d_gpu_multi_get_ncols + + function d_gpu_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'dGPU' + end function d_gpu_multi_get_fmt + +!!$ function d_gpu_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ real(psb_dpk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = dzero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_d_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_d_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_d_multivect_gpu) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'d_gpu_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function d_gpu_multi_dot_v +!!$ +!!$ function d_gpu_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ real(psb_dpk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ real(psb_dpk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function d_gpu_multi_dot_a +!!$ +!!$ subroutine d_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_d_base_multivect_type), intent(inout) :: x +!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ real(psb_dpk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_d_base_multivect_type) +!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_d_multivect_gpu) +!!$ ! Do something different here +!!$ if ((beta /= dzero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_d_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_d_multivect_gpu) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine d_gpu_multi_mlt_v +!!$ +!!$ subroutine d_gpu_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: x(:) +!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_d_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine d_gpu_multi_mlt_a +!!$ +!!$ subroutine d_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: alpha,beta +!!$ real(psb_dpk_), intent(in) :: x(:) +!!$ real(psb_dpk_), intent(in) :: y(:) +!!$ class(psb_d_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_d_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine d_gpu_multi_mlt_a_2 +!!$ +!!$ subroutine d_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: alpha,beta +!!$ class(psb_d_base_multivect_type), intent(inout) :: x +!!$ class(psb_d_base_multivect_type), intent(inout) :: y +!!$ class(psb_d_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_d_multivect_gpu) +!!$ select type (yy => y) +!!$ type is (psb_d_multivect_gpu) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_d_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_d_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine d_gpu_multi_mlt_v_2 + + + subroutine d_gpu_multi_set_scal(x,val) + class(psb_d_multivect_gpu), intent(inout) :: x + real(psb_dpk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_d_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine d_gpu_multi_set_scal + + subroutine d_gpu_multi_set_vect(x,val) + class(psb_d_multivect_gpu), intent(inout) :: x + real(psb_dpk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_d_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine d_gpu_multi_set_vect + + + +!!$ subroutine d_gpu_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ real(psb_dpk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_d_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine d_gpu_multi_scal +!!$ +!!$ +!!$ function d_gpu_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function d_gpu_multi_nrm2 +!!$ +!!$ function d_gpu_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function d_gpu_multi_amax +!!$ +!!$ function d_gpu_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function d_gpu_multi_asum + + subroutine d_gpu_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_multivect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=dzero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_gpu_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine d_gpu_multi_all + + subroutine d_gpu_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_d_multivect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=dzero + call x%set_host() + end subroutine d_gpu_multi_zero + + subroutine d_gpu_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_multivect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_d_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc d_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => d_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_d_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_d_hdiag_csmm + procedure, pass(a) :: csmv => psb_d_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_d_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_d_hdiag_scals + ! procedure, pass(a) :: scalv => psb_d_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_d_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_d_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_d_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_hdiag_from_fmt + procedure, pass(a) :: free => d_hdiag_free + procedure, pass(a) :: mold => psb_d_hdiag_mold + procedure, pass(a) :: to_gpu => psb_d_hdiag_to_gpu + final :: d_hdiag_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_hdiag_mold +#endif + end type psb_d_hdiag_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_hdiag_get_nzeros, d_hdiag_free, d_hdiag_get_fmt, & + & d_hdiag_get_size, d_hdiag_sizeof, d_hdiag_get_nz_row + + + interface + subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_d_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_d_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_d_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_d_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_d_hdiag_mold(a,b,info) + import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hdiag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hdiag_mold + end interface + + interface + subroutine psb_d_hdiag_to_gpu(a,info) + import :: psb_d_hdiag_sparse_mat, psb_ipk_ + class(psb_d_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hdiag_to_gpu + end interface + + interface + subroutine psb_d_cp_hdiag_from_coo(a,b,info) + import :: psb_d_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_d_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_d_mv_hdiag_from_coo(a,b,info) + import :: psb_d_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_d_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_d_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdiag_scal(d,a,info, side) +!!$ import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_d_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdiag_scals(d,a,info) +!!$ import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function d_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function d_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_hdia_sparse_mat%free() + + return + + end subroutine d_hdiag_free + + subroutine d_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_d_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_hdia_sparse_mat%free() + + return + end subroutine d_hdiag_finalize + +#else + + interface + subroutine psb_d_hdiag_mold(a,b,info) + import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hdiag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hdiag_mold + end interface + +#endif + +end module psb_d_hdiag_mat_mod diff --git a/gpu/psb_d_hlg_mat_mod.F90 b/gpu/psb_d_hlg_mat_mod.F90 new file mode 100644 index 00000000..756d13aa --- /dev/null +++ b/gpu/psb_d_hlg_mat_mod.F90 @@ -0,0 +1,398 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_hlg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use psb_d_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_hll_sparse_mat) :: psb_d_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => d_hlg_get_fmt + procedure, pass(a) :: sizeof => d_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_d_hlg_vect_mv + procedure, pass(a) :: csmm => psb_d_hlg_csmm + procedure, pass(a) :: csmv => psb_d_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_d_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_d_hlg_scals + procedure, pass(a) :: scalv => psb_d_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_d_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_mv_hlg_from_fmt + procedure, pass(a) :: free => d_hlg_free + procedure, pass(a) :: mold => psb_d_hlg_mold + procedure, pass(a) :: is_host => d_hlg_is_host + procedure, pass(a) :: is_dev => d_hlg_is_dev + procedure, pass(a) :: is_sync => d_hlg_is_sync + procedure, pass(a) :: set_host => d_hlg_set_host + procedure, pass(a) :: set_dev => d_hlg_set_dev + procedure, pass(a) :: set_sync => d_hlg_set_sync + procedure, pass(a) :: sync => d_hlg_sync + procedure, pass(a) :: from_gpu => psb_d_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_d_hlg_to_gpu + final :: d_hlg_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_hlg_mold +#endif + end type psb_d_hlg_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_hlg_get_nzeros, d_hlg_free, d_hlg_get_fmt, & + & d_hlg_get_size, d_hlg_sizeof, d_hlg_get_nz_row + + + interface + subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hlg_vect_mv + end interface + + interface + subroutine psb_d_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hlg_inner_vect_sv + end interface + + interface + subroutine psb_d_hlg_reallocate_nz(nz,a) + import :: psb_d_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_d_hlg_reallocate_nz + end interface + + interface + subroutine psb_d_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_d_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_hlg_allocate_mnnz + end interface + + interface + subroutine psb_d_hlg_mold(a,b,info) + import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hlg_mold + end interface + + interface + subroutine psb_d_hlg_from_gpu(a,info) + import :: psb_d_hlg_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hlg_from_gpu + end interface + + interface + subroutine psb_d_hlg_to_gpu(a,info, nzrm) + import :: psb_d_hlg_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_hlg_to_gpu + end interface + + interface + subroutine psb_d_cp_hlg_from_coo(a,b,info) + import :: psb_d_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hlg_from_coo + end interface + + interface + subroutine psb_d_cp_hlg_from_fmt(a,b,info) + import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hlg_from_fmt + end interface + + interface + subroutine psb_d_mv_hlg_from_coo(a,b,info) + import :: psb_d_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hlg_from_coo + end interface + + + interface + subroutine psb_d_mv_hlg_from_fmt(a,b,info) + import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hlg_from_fmt + end interface + + interface + subroutine psb_d_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hlg_csmv + end interface + interface + subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hlg_csmm + end interface + + interface + subroutine psb_d_hlg_scal(d,a,info, side) + import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_hlg_scal + end interface + + interface + subroutine psb_d_hlg_scals(d,a,info) + import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_hlg_sizeof(a) result(res) + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_hlg_sizeof + + function d_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function d_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_hll_sparse_mat%free() + + return + + end subroutine d_hlg_free + + + subroutine d_hlg_sync(a) + implicit none + class(psb_d_hlg_sparse_mat), target, intent(in) :: a + class(psb_d_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine d_hlg_sync + + subroutine d_hlg_set_host(a) + implicit none + class(psb_d_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_hlg_set_host + + subroutine d_hlg_set_dev(a) + implicit none + class(psb_d_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_hlg_set_dev + + subroutine d_hlg_set_sync(a) + implicit none + class(psb_d_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_hlg_set_sync + + function d_hlg_is_dev(a) result(res) + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_hlg_is_dev + + function d_hlg_is_host(a) result(res) + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_hlg_is_host + + function d_hlg_is_sync(a) result(res) + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_hlg_is_sync + + + subroutine d_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_d_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine d_hlg_finalize + +#else + + interface + subroutine psb_d_hlg_mold(a,b,info) + import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hlg_mold + end interface + +#endif + +end module psb_d_hlg_mat_mod diff --git a/gpu/psb_d_hybg_mat_mod.F90 b/gpu/psb_d_hybg_mat_mod.F90 new file mode 100644 index 00000000..d764daa7 --- /dev/null +++ b/gpu/psb_d_hybg_mat_mod.F90 @@ -0,0 +1,306 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if CUDA_SHORT_VERSION <= 10 + +module psb_d_hybg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use cusparse_mod + + type, extends(psb_d_csr_sparse_mat) :: psb_d_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(d_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => d_hybg_get_fmt + procedure, pass(a) :: sizeof => d_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_d_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_hybg_csmm + procedure, pass(a) :: csmv => psb_d_hybg_csmv + procedure, pass(a) :: scals => psb_d_hybg_scals + procedure, pass(a) :: scalv => psb_d_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_d_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_mv_hybg_from_fmt + procedure, pass(a) :: free => d_hybg_free + procedure, pass(a) :: mold => psb_d_hybg_mold + procedure, pass(a) :: to_gpu => psb_d_hybg_to_gpu + final :: d_hybg_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_hybg_mold +#endif + end type psb_d_hybg_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_hybg_get_nzeros, d_hybg_free, d_hybg_get_fmt, & + & d_hybg_get_size, d_hybg_sizeof, d_hybg_get_nz_row + + + interface + subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hybg_inner_vect_sv + end interface + + interface + subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hybg_vect_mv + end interface + + interface + subroutine psb_d_hybg_reallocate_nz(nz,a) + import :: psb_d_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_d_hybg_reallocate_nz + end interface + + interface + subroutine psb_d_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_d_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_hybg_allocate_mnnz + end interface + + interface + subroutine psb_d_hybg_mold(a,b,info) + import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hybg_mold + end interface + + interface + subroutine psb_d_hybg_to_gpu(a,info, nzrm) + import :: psb_d_hybg_sparse_mat, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_hybg_to_gpu + end interface + + interface + subroutine psb_d_cp_hybg_from_coo(a,b,info) + import :: psb_d_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hybg_from_coo + end interface + + interface + subroutine psb_d_cp_hybg_from_fmt(a,b,info) + import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hybg_from_fmt + end interface + + interface + subroutine psb_d_mv_hybg_from_coo(a,b,info) + import :: psb_d_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hybg_from_coo + end interface + + interface + subroutine psb_d_mv_hybg_from_fmt(a,b,info) + import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hybg_from_fmt + end interface + + interface + subroutine psb_d_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hybg_csmv + end interface + interface + subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hybg_csmm + end interface + + interface + subroutine psb_d_hybg_scal(d,a,info,side) + import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_hybg_scal + end interface + + interface + subroutine psb_d_hybg_scals(d,a,info) + import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_hybg_sizeof(a) result(res) + implicit none + class(psb_d_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_hybg_sizeof + + function d_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function d_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_d_csr_sparse_mat%free() + + return + + end subroutine d_hybg_free + + subroutine d_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_d_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine d_hybg_finalize + +#else + + interface + subroutine psb_d_hybg_mold(a,b,info) + import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hybg_mold + end interface + +#endif + +end module psb_d_hybg_mat_mod +#endif diff --git a/gpu/psb_d_vectordev_mod.F90 b/gpu/psb_d_vectordev_mod.F90 new file mode 100644 index 00000000..cda0d9d7 --- /dev/null +++ b/gpu/psb_d_vectordev_mod.F90 @@ -0,0 +1,390 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_vectordev_mod + + use psb_base_vectordev_mod + +#ifdef HAVE_SPGPU + + interface registerMapped + function registerMappedDouble(buf,d_p,n,dummy) & + & result(res) bind(c,name='registerMappedDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: buf + type(c_ptr) :: d_p + integer(c_int),value :: n + real(c_double), value :: dummy + end function registerMappedDouble + end interface + + interface writeMultiVecDevice + function writeMultiVecDeviceDouble(deviceVec,hostVec) & + & result(res) bind(c,name='writeMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + real(c_double) :: hostVec(*) + end function writeMultiVecDeviceDouble + function writeMultiVecDeviceDoubleR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='writeMultiVecDeviceDoubleR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + real(c_double) :: hostVec(ld,*) + end function writeMultiVecDeviceDoubleR2 + end interface + + interface readMultiVecDevice + function readMultiVecDeviceDouble(deviceVec,hostVec) & + & result(res) bind(c,name='readMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + real(c_double) :: hostVec(*) + end function readMultiVecDeviceDouble + function readMultiVecDeviceDoubleR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='readMultiVecDeviceDoubleR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + real(c_double) :: hostVec(ld,*) + end function readMultiVecDeviceDoubleR2 + end interface + + interface allocateDouble + function allocateDouble(didx,n) & + & result(res) bind(c,name='allocateDouble') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: n + integer(c_int) :: res + end function allocateDouble + function allocateMultiDouble(didx,m,n) & + & result(res) bind(c,name='allocateMultiDouble') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: m,n + integer(c_int) :: res + end function allocateMultiDouble + end interface + + interface writeDouble + function writeDouble(didx,hidx,n) & + & result(res) bind(c,name='writeDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_double) :: hidx(*) + integer(c_int),value :: n + end function writeDouble + function writeDoubleFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='writeDoubleFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_double) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function writeDoubleFirst + function writeMultiDouble(didx,hidx,m,n) & + & result(res) bind(c,name='writeMultiDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_double) :: hidx(m,*) + integer(c_int),value :: m,n + end function writeMultiDouble + end interface + + interface readDouble + function readDouble(didx,hidx,n) & + & result(res) bind(c,name='readDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_double) :: hidx(*) + integer(c_int),value :: n + end function readDouble + function readDoubleFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='readDoubleFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_double) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function readDoubleFirst + function readMultiDouble(didx,hidx,m,n) & + & result(res) bind(c,name='readMultiDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_double) :: hidx(m,*) + integer(c_int),value :: m,n + end function readMultiDouble + end interface + + interface + subroutine freeDouble(didx) & + & bind(c,name='freeDouble') + use iso_c_binding + type(c_ptr), value :: didx + end subroutine freeDouble + end interface + + + interface setScalDevice + function setScalMultiVecDeviceDouble(val, first, last, & + & indexBase, deviceVecX) result(res) & + & bind(c,name='setscalMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: first,last,indexbase + real(c_double), value :: val + type(c_ptr), value :: deviceVecX + end function setScalMultiVecDeviceDouble + end interface + + interface + function geinsMultiVecDeviceDouble(n,deviceVecIrl,deviceVecVal,& + & dupl,indexbase,deviceVecX) & + & result(res) bind(c,name='geinsMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n, dupl,indexbase + type(c_ptr), value :: deviceVecIrl, deviceVecVal, deviceVecX + end function geinsMultiVecDeviceDouble + end interface + + ! New gather functions + + interface + function igathMultiVecDeviceDouble(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceDouble + end interface + + interface + function igathMultiVecDeviceDoubleVecIdx(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceDoubleVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceDoubleVecIdx + end interface + + interface + function iscatMultiVecDeviceDouble(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + real(c_double),value :: beta + end function iscatMultiVecDeviceDouble + end interface + + interface + function iscatMultiVecDeviceDoubleVecIdx(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceDoubleVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + real(c_double),value :: beta + end function iscatMultiVecDeviceDoubleVecIdx + end interface + + + interface scalMultiVecDevice + function scalMultiVecDeviceDouble(alpha,deviceVecA) & + & result(val) bind(c,name='scalMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + real(c_double), value :: alpha + type(c_ptr), value :: deviceVecA + end function scalMultiVecDeviceDouble + end interface + + interface dotMultiVecDevice + function dotMultiVecDeviceDouble(res, n,deviceVecA,deviceVecB) & + & result(val) bind(c,name='dotMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_double) :: res + type(c_ptr), value :: deviceVecA, deviceVecB + end function dotMultiVecDeviceDouble + end interface + + interface nrm2MultiVecDevice + function nrm2MultiVecDeviceDouble(res,n,deviceVecA) & + & result(val) bind(c,name='nrm2MultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_double) :: res + type(c_ptr), value :: deviceVecA + end function nrm2MultiVecDeviceDouble + end interface + + interface amaxMultiVecDevice + function amaxMultiVecDeviceDouble(res,n,deviceVecA) & + & result(val) bind(c,name='amaxMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_double) :: res + type(c_ptr), value :: deviceVecA + end function amaxMultiVecDeviceDouble + end interface + + interface asumMultiVecDevice + function asumMultiVecDeviceDouble(res,n,deviceVecA) & + & result(val) bind(c,name='asumMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_double) :: res + type(c_ptr), value :: deviceVecA + end function asumMultiVecDeviceDouble + end interface + + + interface axpbyMultiVecDevice + function axpbyMultiVecDeviceDouble(n,alpha,deviceVecA,beta,deviceVecB) & + & result(res) bind(c,name='axpbyMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_double), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB + end function axpbyMultiVecDeviceDouble + end interface + + interface axyMultiVecDevice + function axyMultiVecDeviceDouble(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='axyMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_double), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function axyMultiVecDeviceDouble + end interface + + interface axybzMultiVecDevice + function axybzMultiVecDeviceDouble(n,alpha,deviceVecA,deviceVecB,beta,deviceVecZ) & + & result(res) bind(c,name='axybzMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_double), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB,deviceVecZ + end function axybzMultiVecDeviceDouble + end interface + + + interface absMultiVecDevice + function absMultiVecDeviceDouble(n,alpha,deviceVecA) & + & result(res) bind(c,name='absMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_double), value :: alpha + type(c_ptr), value :: deviceVecA + end function absMultiVecDeviceDouble + function absMultiVecDeviceDouble2(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='absMultiVecDeviceDouble2') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_double), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function absMultiVecDeviceDouble2 + end interface + + interface inner_register + module procedure inner_registerDouble + end interface + + interface inner_unregister + module procedure inner_unregisterDouble + end interface + +contains + + + function inner_registerDouble(buffer,dval) result(res) + real(c_double), allocatable, target :: buffer(:) + type(c_ptr) :: dval + integer(c_int) :: res + real(c_double) :: dummy + res = registerMapped(c_loc(buffer),dval,size(buffer), dummy) + end function inner_registerDouble + + subroutine inner_unregisterDouble(buffer) + real(c_double), allocatable, target :: buffer(:) + + call unregisterMapped(c_loc(buffer)) + end subroutine inner_unregisterDouble + +#endif + +end module psb_d_vectordev_mod diff --git a/gpu/psb_gpu_env_mod.F90 b/gpu/psb_gpu_env_mod.F90 new file mode 100644 index 00000000..0473f4ac --- /dev/null +++ b/gpu/psb_gpu_env_mod.F90 @@ -0,0 +1,340 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_gpu_env_mod + use psb_const_mod + use iso_c_binding + use base_cusparse_mod +! interface psb_gpu_init +! module procedure psb_gpu_init +! end interface +#if defined(HAVE_CUDA) + use core_mod + + interface + function psb_gpuGetHandle() & + & result(res) bind(c,name='psb_gpuGetHandle') + use iso_c_binding + type(c_ptr) :: res + end function psb_gpuGetHandle + end interface + + interface + function psb_gpuGetStream() & + & result(res) bind(c,name='psb_gpuGetStream') + use iso_c_binding + type(c_ptr) :: res + end function psb_gpuGetStream + end interface + + interface + function psb_C_gpu_init(dev) & + & result(res) bind(c,name='gpuInit') + use iso_c_binding + integer(c_int),value :: dev + integer(c_int) :: res + end function psb_C_gpu_init + end interface + + interface + function psb_cuda_getDeviceCount() & + & result(res) bind(c,name='getDeviceCount') + use iso_c_binding + integer(c_int) :: res + end function psb_cuda_getDeviceCount + end interface + + interface + function psb_cuda_getDevice() & + & result(res) bind(c,name='getDevice') + use iso_c_binding + integer(c_int) :: res + end function psb_cuda_getDevice + end interface + + interface + function psb_cuda_setDevice(dev) & + & result(res) bind(c,name='setDevice') + use iso_c_binding + integer(c_int), value :: dev + integer(c_int) :: res + end function psb_cuda_setDevice + end interface + + + interface + subroutine psb_gpuCreateHandle() & + & bind(c,name='psb_gpuCreateHandle') + use iso_c_binding + end subroutine psb_gpuCreateHandle + end interface + + interface + subroutine psb_gpuSetStream(handle,stream) & + & bind(c,name='psb_gpuSetStream') + use iso_c_binding + type(c_ptr), value :: handle, stream + end subroutine psb_gpuSetStream + end interface + + interface + subroutine psb_gpuDestroyHandle() & + & bind(c,name='psb_gpuDestroyHandle') + use iso_c_binding + end subroutine psb_gpuDestroyHandle + end interface + + interface + subroutine psb_cudaReset() & + & bind(c,name='cudaReset') + use iso_c_binding + end subroutine psb_cudaReset + end interface + + interface + subroutine psb_gpuClose() & + & bind(c,name='gpuClose') + use iso_c_binding + end subroutine psb_gpuClose + end interface +#endif + + interface + function psb_C_DeviceHasUVA() & + & result(res) bind(c,name='DeviceHasUVA') + use iso_c_binding + integer(c_int) :: res + end function psb_C_DeviceHasUVA + end interface + + interface + function psb_C_get_MultiProcessors() & + & result(res) bind(c,name='getGPUMultiProcessors') + use iso_c_binding + integer(c_int) :: res + end function psb_C_get_MultiProcessors + function psb_C_get_MemoryBusWidth() & + & result(res) bind(c,name='getGPUMemoryBusWidth') + use iso_c_binding + integer(c_int) :: res + end function psb_C_get_MemoryBusWidth + function psb_C_get_MemoryClockRate() & + & result(res) bind(c,name='getGPUMemoryClockRate') + use iso_c_binding + integer(c_int) :: res + end function psb_C_get_MemoryClockRate + function psb_C_get_WarpSize() & + & result(res) bind(c,name='getGPUWarpSize') + use iso_c_binding + integer(c_int) :: res + end function psb_C_get_WarpSize + function psb_C_get_MaxThreadsPerMP() & + & result(res) bind(c,name='getGPUMaxThreadsPerMP') + use iso_c_binding + integer(c_int) :: res + end function psb_C_get_MaxThreadsPerMP + function psb_C_get_MaxRegistersPerBlock() & + & result(res) bind(c,name='getGPUMaxRegistersPerBlock') + use iso_c_binding + integer(c_int) :: res + end function psb_C_get_MaxRegistersPerBlock + end interface + interface + subroutine psb_C_cpy_NameString(cstring) & + & bind(c,name='cpyGPUNameString') + use iso_c_binding + character(c_char) :: cstring(*) + end subroutine psb_C_cpy_NameString + end interface + + logical, private :: gpu_do_maybe_free_buffer = .false. + +Contains + + function psb_gpu_get_maybe_free_buffer() result(res) + logical :: res + res = gpu_do_maybe_free_buffer + end function psb_gpu_get_maybe_free_buffer + + subroutine psb_gpu_set_maybe_free_buffer(val) + logical, intent(in) :: val + gpu_do_maybe_free_buffer = val + end subroutine psb_gpu_set_maybe_free_buffer + + ! !!!!!!!!!!!!!!!!!!!!!! + ! + ! Environment handling + ! + ! !!!!!!!!!!!!!!!!!!!!!! + + + subroutine psb_gpu_init(ctxt,dev) + use psb_penv_mod + use psb_const_mod + use psb_error_mod + type(psb_ctxt_type), intent(in) :: ctxt + integer, intent(in), optional :: dev + + integer :: np, npavail, iam, info, count, dev_ + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) +#if defined (HAVE_CUDA) +#if defined(SERIAL_MPI) + iam = 0 +#else + call psb_info(ctxt,iam,np) +#endif + + count = psb_cuda_getDeviceCount() + + if (present(dev)) then + info = psb_C_gpu_init(dev) + else + if (count >0) then + dev_ = mod(iam,count) + else + dev_ = 0 + end if + info = psb_C_gpu_init(dev_) + end if + if (info == 0) info = initFcusparse() + if (info /= 0) then + call psb_errpush(psb_err_internal_error_,'psb_gpu_init') + goto 9999 + end if + call psb_gpuCreateHandle() +#endif + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(ctxt,err_act) + + return + + end subroutine psb_gpu_init + + + subroutine psb_gpu_DeviceSync() +#if defined(HAVE_CUDA) + call psb_cudaSync() +#endif + end subroutine psb_gpu_DeviceSync + + function psb_gpu_getDeviceCount() result(res) + integer :: res +#if defined(HAVE_CUDA) + res = psb_cuda_getDeviceCount() +#else + res = 0 +#endif + end function psb_gpu_getDeviceCount + + subroutine psb_gpu_exit() + integer :: res + res = closeFcusparse() + call psb_gpuClose() + call psb_cudaReset() + end subroutine psb_gpu_exit + + function psb_gpu_DeviceHasUVA() result(res) + logical :: res + res = (psb_C_DeviceHasUVA() == 1) + end function psb_gpu_DeviceHasUVA + + function psb_gpu_MultiProcessors() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MultiProcessors() + end function psb_gpu_MultiProcessors + + function psb_gpu_MaxRegistersPerBlock() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MaxRegistersPerBlock() + end function psb_gpu_MaxRegistersPerBlock + + function psb_gpu_MaxThreadsPerMP() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MaxThreadsPerMP() + end function psb_gpu_MaxThreadsPerMP + + function psb_gpu_WarpSize() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_WarpSize() + end function psb_gpu_WarpSize + + function psb_gpu_MemoryClockRate() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MemoryClockRate() + end function psb_gpu_MemoryClockRate + + function psb_gpu_MemoryBusWidth() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MemoryBusWidth() + end function psb_gpu_MemoryBusWidth + + function psb_gpu_MemoryPeakBandwidth() result(res) + real(psb_dpk_) :: res + ! Formula here: 2*ClockRate(KHz)*BusWidth(bit) + ! normalization: bit/byte, KHz/MHz + ! output: MBytes/s + res = 2.d0*0.125d0*1.d-3*psb_C_get_MemoryBusWidth()*psb_C_get_MemoryClockRate() + end function psb_gpu_MemoryPeakBandwidth + + function psb_gpu_DeviceName() result(res) + character(len=256) :: res + character :: cstring(256) + call psb_C_cpy_NameString(cstring) + call stringc2f(cstring,res) + end function psb_gpu_DeviceName + + + subroutine stringc2f(cstring,fstring) + character(c_char) :: cstring(*) + character(len=*) :: fstring + integer :: i + + i = 1 + do + if (cstring(i) == c_null_char) exit + if (i > len(fstring)) exit + fstring(i:i) = cstring(i) + i = i + 1 + end do + do + if (i > len(fstring)) exit + fstring(i:i) = " " + i = i + 1 + end do + return + end subroutine stringc2f + +end module psb_gpu_env_mod diff --git a/gpu/psb_gpu_mod.F90 b/gpu/psb_gpu_mod.F90 new file mode 100644 index 00000000..7eba8062 --- /dev/null +++ b/gpu/psb_gpu_mod.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_gpu_mod + use psb_const_mod + use psb_gpu_env_mod + + use psb_i_gpu_vect_mod + use psb_s_gpu_vect_mod + use psb_d_gpu_vect_mod + use psb_c_gpu_vect_mod + use psb_z_gpu_vect_mod + + use psb_i_gpu_multivect_mod + use psb_s_gpu_multivect_mod + use psb_d_gpu_multivect_mod + use psb_c_gpu_multivect_mod + use psb_z_gpu_multivect_mod + + use psb_d_ell_mat_mod + use psb_d_elg_mat_mod + use psb_s_ell_mat_mod + use psb_s_elg_mat_mod + use psb_z_ell_mat_mod + use psb_z_elg_mat_mod + use psb_c_ell_mat_mod + use psb_c_elg_mat_mod + + use psb_s_hll_mat_mod + use psb_s_hlg_mat_mod + use psb_d_hll_mat_mod + use psb_d_hlg_mat_mod + use psb_c_hll_mat_mod + use psb_c_hlg_mat_mod + use psb_z_hll_mat_mod + use psb_z_hlg_mat_mod + + use psb_s_csrg_mat_mod + use psb_d_csrg_mat_mod + use psb_c_csrg_mat_mod + use psb_z_csrg_mat_mod +#if CUDA_SHORT_VERSION <= 10 + use psb_s_hybg_mat_mod + use psb_d_hybg_mat_mod + use psb_c_hybg_mat_mod + use psb_z_hybg_mat_mod +#endif + use psb_d_diag_mat_mod + use psb_d_hdiag_mat_mod + + use psb_s_dnsg_mat_mod + use psb_d_dnsg_mat_mod + use psb_c_dnsg_mat_mod + use psb_z_dnsg_mat_mod + + use psb_s_hdiag_mat_mod + ! use psb_s_diag_mat_mod + +end module psb_gpu_mod + diff --git a/gpu/psb_i_csrg_mat_mod.F90 b/gpu/psb_i_csrg_mat_mod.F90 new file mode 100644 index 00000000..de25370f --- /dev/null +++ b/gpu/psb_i_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_csrg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_csr_sparse_mat) :: psb_i_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(i_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => i_csrg_get_fmt + procedure, pass(a) :: sizeof => i_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_i_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_i_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_i_csrg_csmm + procedure, pass(a) :: csmv => psb_i_csrg_csmv + procedure, pass(a) :: scals => psb_i_csrg_scals + procedure, pass(a) :: scalv => psb_i_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_i_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_mv_csrg_from_fmt + procedure, pass(a) :: free => i_csrg_free + procedure, pass(a) :: mold => psb_i_csrg_mold + procedure, pass(a) :: is_host => i_csrg_is_host + procedure, pass(a) :: is_dev => i_csrg_is_dev + procedure, pass(a) :: is_sync => i_csrg_is_sync + procedure, pass(a) :: set_host => i_csrg_set_host + procedure, pass(a) :: set_dev => i_csrg_set_dev + procedure, pass(a) :: set_sync => i_csrg_set_sync + procedure, pass(a) :: sync => i_csrg_sync + procedure, pass(a) :: to_gpu => psb_i_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_i_csrg_from_gpu + final :: i_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_csrg_mold +#endif + end type psb_i_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_csrg_get_nzeros, i_csrg_free, i_csrg_get_fmt, & + & i_csrg_get_size, i_csrg_sizeof, i_csrg_get_nz_row + + + interface + subroutine psb_i_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_i_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_csrg_vect_mv + end interface + + interface + subroutine psb_i_csrg_reallocate_nz(nz,a) + import :: psb_i_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_i_csrg_reallocate_nz + end interface + + interface + subroutine psb_i_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_i_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_csrg_allocate_mnnz + end interface + + interface + subroutine psb_i_csrg_mold(a,b,info) + import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_csrg_mold + end interface + + interface + subroutine psb_i_csrg_to_gpu(a,info, nzrm) + import :: psb_i_csrg_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_csrg_to_gpu + end interface + + interface + subroutine psb_i_csrg_from_gpu(a,info) + import :: psb_i_csrg_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_csrg_from_gpu + end interface + + interface + subroutine psb_i_cp_csrg_from_coo(a,b,info) + import :: psb_i_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_csrg_from_coo + end interface + + interface + subroutine psb_i_cp_csrg_from_fmt(a,b,info) + import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_csrg_from_fmt + end interface + + interface + subroutine psb_i_mv_csrg_from_coo(a,b,info) + import :: psb_i_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_csrg_from_coo + end interface + + interface + subroutine psb_i_mv_csrg_from_fmt(a,b,info) + import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_csrg_from_fmt + end interface + + interface + subroutine psb_i_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_csrg_csmv + end interface + interface + subroutine psb_i_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_csrg_csmm + end interface + + interface + subroutine psb_i_csrg_scal(d,a,info,side) + import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_csrg_scal + end interface + + interface + subroutine psb_i_csrg_scals(d,a,info) + import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_csrg_sizeof(a) result(res) + implicit none + class(psb_i_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_csrg_sizeof + + function i_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function i_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine i_csrg_set_host(a) + implicit none + class(psb_i_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine i_csrg_set_host + + subroutine i_csrg_set_dev(a) + implicit none + class(psb_i_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine i_csrg_set_dev + + subroutine i_csrg_set_sync(a) + implicit none + class(psb_i_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine i_csrg_set_sync + + function i_csrg_is_dev(a) result(res) + implicit none + class(psb_i_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function i_csrg_is_dev + + function i_csrg_is_host(a) result(res) + implicit none + class(psb_i_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function i_csrg_is_host + + function i_csrg_is_sync(a) result(res) + implicit none + class(psb_i_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function i_csrg_is_sync + + + subroutine i_csrg_sync(a) + implicit none + class(psb_i_csrg_sparse_mat), target, intent(in) :: a + class(psb_i_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine i_csrg_sync + + subroutine i_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_i_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_i_csr_sparse_mat%free() + + return + + end subroutine i_csrg_free + + subroutine i_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_i_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine i_csrg_finalize + +#else + interface + subroutine psb_i_csrg_mold(a,b,info) + import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_csrg_mold + end interface + +#endif + +end module psb_i_csrg_mat_mod diff --git a/gpu/psb_i_diag_mat_mod.F90 b/gpu/psb_i_diag_mat_mod.F90 new file mode 100644 index 00000000..3559c09a --- /dev/null +++ b/gpu/psb_i_diag_mat_mod.F90 @@ -0,0 +1,308 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_i_dia_mat_mod + + type, extends(psb_i_dia_sparse_mat) :: psb_i_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => i_diag_get_fmt + procedure, pass(a) :: sizeof => i_diag_sizeof + procedure, pass(a) :: vect_mv => psb_i_diag_vect_mv +! procedure, pass(a) :: csmm => psb_i_diag_csmm + procedure, pass(a) :: csmv => psb_i_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_i_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_i_diag_scals +! procedure, pass(a) :: scalv => psb_i_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_i_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_i_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_i_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_i_mv_diag_from_fmt + procedure, pass(a) :: free => i_diag_free + procedure, pass(a) :: mold => psb_i_diag_mold + procedure, pass(a) :: to_gpu => psb_i_diag_to_gpu + final :: i_diag_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_diag_mold +#endif + end type psb_i_diag_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_diag_get_nzeros, i_diag_free, i_diag_get_fmt, & + & i_diag_get_size, i_diag_sizeof, i_diag_get_nz_row + + + interface + subroutine psb_i_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_diag_vect_mv + end interface + + interface + subroutine psb_i_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_diag_inner_vect_sv + end interface + + interface + subroutine psb_i_diag_reallocate_nz(nz,a) + import :: psb_i_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_diag_sparse_mat), intent(inout) :: a + end subroutine psb_i_diag_reallocate_nz + end interface + + interface + subroutine psb_i_diag_allocate_mnnz(m,n,a,nz) + import :: psb_i_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_diag_allocate_mnnz + end interface + + interface + subroutine psb_i_diag_mold(a,b,info) + import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_diag_mold + end interface + + interface + subroutine psb_i_diag_to_gpu(a,info, nzrm) + import :: psb_i_diag_sparse_mat, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_diag_to_gpu + end interface + + interface + subroutine psb_i_cp_diag_from_coo(a,b,info) + import :: psb_i_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_diag_from_coo + end interface + + interface + subroutine psb_i_cp_diag_from_fmt(a,b,info) + import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_diag_from_fmt + end interface + + interface + subroutine psb_i_mv_diag_from_coo(a,b,info) + import :: psb_i_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_diag_from_coo + end interface + + + interface + subroutine psb_i_mv_diag_from_fmt(a,b,info) + import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_diag_from_fmt + end interface + + interface + subroutine psb_i_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_diag_csmv + end interface + interface + subroutine psb_i_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_diag_csmm + end interface + + interface + subroutine psb_i_diag_scal(d,a,info, side) + import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_diag_scal + end interface + + interface + subroutine psb_i_diag_scals(d,a,info) + import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_diag_sizeof(a) result(res) + implicit none + class(psb_i_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + psb_sizeof_int * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_diag_sizeof + + function i_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function i_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_dia_sparse_mat%free() + + return + + end subroutine i_diag_free + + subroutine i_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_i_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine i_diag_finalize + +#else + + interface + subroutine psb_i_diag_mold(a,b,info) + import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_diag_mold + end interface + +#endif + +end module psb_i_diag_mat_mod diff --git a/gpu/psb_i_dnsg_mat_mod.F90 b/gpu/psb_i_dnsg_mat_mod.F90 new file mode 100644 index 00000000..978996ae --- /dev/null +++ b/gpu/psb_i_dnsg_mat_mod.F90 @@ -0,0 +1,294 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_dnsg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use psb_i_dns_mat_mod + use dnsdev_mod + + type, extends(psb_i_dns_sparse_mat) :: psb_i_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of DNS, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => i_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => i_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_i_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_i_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_i_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_i_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_i_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_i_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_i_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_i_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_i_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_mv_dnsg_from_fmt + procedure, pass(a) :: free => i_dnsg_free + procedure, pass(a) :: mold => psb_i_dnsg_mold + procedure, pass(a) :: to_gpu => psb_i_dnsg_to_gpu + final :: i_dnsg_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_dnsg_mold +#endif + end type psb_i_dnsg_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_dnsg_get_nzeros, i_dnsg_free, i_dnsg_get_fmt, & + & i_dnsg_get_size, i_dnsg_get_nz_row + + + interface + subroutine psb_i_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_i_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_i_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type +!!$ class(psb_i_dnsg_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta +!!$ class(psb_i_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_i_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_i_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_i_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_i_dnsg_mold(a,b,info) + import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_dnsg_mold + end interface + + interface + subroutine psb_i_dnsg_to_gpu(a,info) + import :: psb_i_dnsg_sparse_mat, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_dnsg_to_gpu + end interface + + interface + subroutine psb_i_cp_dnsg_from_coo(a,b,info) + import :: psb_i_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_dnsg_from_coo + end interface + + interface + subroutine psb_i_cp_dnsg_from_fmt(a,b,info) + import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_i_mv_dnsg_from_coo(a,b,info) + import :: psb_i_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_i_mv_dnsg_from_fmt(a,b,info) + import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_i_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_dnsg_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:) +!!$ integer(psb_ipk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_i_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_dnsg_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) +!!$ integer(psb_ipk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_dnsg_scal(d,a,info, side) +!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_i_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_dnsg_scals(d,a,info) +!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function i_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function i_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_dns_sparse_mat%free() + + return + + end subroutine i_dnsg_free + + subroutine i_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_i_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine i_dnsg_finalize + +#else + + interface + subroutine psb_i_dnsg_mold(a,b,info) + import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_dnsg_mold + end interface + +#endif + +end module psb_i_dnsg_mat_mod diff --git a/gpu/psb_i_elg_mat_mod.F90 b/gpu/psb_i_elg_mat_mod.F90 new file mode 100644 index 00000000..afc71662 --- /dev/null +++ b/gpu/psb_i_elg_mat_mod.F90 @@ -0,0 +1,483 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_elg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use psb_i_ell_mat_mod + use psb_i_gpu_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_ell_sparse_mat) :: psb_i_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => i_elg_get_fmt + procedure, pass(a) :: sizeof => i_elg_sizeof + procedure, pass(a) :: vect_mv => psb_i_elg_vect_mv + procedure, pass(a) :: csmm => psb_i_elg_csmm + procedure, pass(a) :: csmv => psb_i_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_i_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_i_elg_scals + procedure, pass(a) :: scalv => psb_i_elg_scal + procedure, pass(a) :: reallocate_nz => psb_i_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_elg_allocate_mnnz + procedure, pass(a) :: reinit => i_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_mv_elg_from_fmt + procedure, pass(a) :: free => i_elg_free + procedure, pass(a) :: mold => psb_i_elg_mold + procedure, pass(a) :: csput_a => psb_i_elg_csput_a + procedure, pass(a) :: csput_v => psb_i_elg_csput_v + procedure, pass(a) :: is_host => i_elg_is_host + procedure, pass(a) :: is_dev => i_elg_is_dev + procedure, pass(a) :: is_sync => i_elg_is_sync + procedure, pass(a) :: set_host => i_elg_set_host + procedure, pass(a) :: set_dev => i_elg_set_dev + procedure, pass(a) :: set_sync => i_elg_set_sync + procedure, pass(a) :: sync => i_elg_sync + procedure, pass(a) :: from_gpu => psb_i_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_i_elg_to_gpu + procedure, pass(a) :: asb => psb_i_elg_asb + final :: i_elg_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_elg_mold + procedure, pass(a) :: asb => psb_i_elg_asb +#endif + end type psb_i_elg_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_elg_get_nzeros, i_elg_free, i_elg_get_fmt, & + & i_elg_get_size, i_elg_sizeof, i_elg_get_nz_row, i_elg_sync + + + interface + subroutine psb_i_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_elg_vect_mv + end interface + + interface + subroutine psb_i_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_elg_inner_vect_sv + end interface + + interface + subroutine psb_i_elg_reallocate_nz(nz,a) + import :: psb_i_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_elg_reallocate_nz + end interface + + interface + subroutine psb_i_elg_allocate_mnnz(m,n,a,nz) + import :: psb_i_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_elg_allocate_mnnz + end interface + + interface + subroutine psb_i_elg_mold(a,b,info) + import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_elg_mold + end interface + + interface + subroutine psb_i_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_elg_csput_a + end interface + + interface + subroutine psb_i_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_i_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_i_base_vect_type,& + & psb_i_base_vect_type + class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_elg_csput_v + end interface + + interface + subroutine psb_i_elg_from_gpu(a,info) + import :: psb_i_elg_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_elg_from_gpu + end interface + + interface + subroutine psb_i_elg_to_gpu(a,info, nzrm) + import :: psb_i_elg_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_elg_to_gpu + end interface + + interface + subroutine psb_i_cp_elg_from_coo(a,b,info) + import :: psb_i_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_elg_from_coo + end interface + + interface + subroutine psb_i_cp_elg_from_fmt(a,b,info) + import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_elg_from_fmt + end interface + + interface + subroutine psb_i_mv_elg_from_coo(a,b,info) + import :: psb_i_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_elg_from_coo + end interface + + + interface + subroutine psb_i_mv_elg_from_fmt(a,b,info) + import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_elg_from_fmt + end interface + + interface + subroutine psb_i_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_elg_csmv + end interface + interface + subroutine psb_i_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_elg_csmm + end interface + + interface + subroutine psb_i_elg_scal(d,a,info, side) + import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_elg_scal + end interface + + interface + subroutine psb_i_elg_scals(d,a,info) + import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_elg_scals + end interface + + interface + subroutine psb_i_elg_asb(a) + import :: psb_i_elg_sparse_mat + class(psb_i_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_elg_sizeof(a) result(res) + implicit none + class(psb_i_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_elg_sizeof + + function i_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function i_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine i_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_i_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = izero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine i_elg_reinit + + subroutine i_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_i_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine i_elg_free + + subroutine i_elg_sync(a) + implicit none + class(psb_i_elg_sparse_mat), target, intent(in) :: a + class(psb_i_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine i_elg_sync + + subroutine i_elg_set_host(a) + implicit none + class(psb_i_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine i_elg_set_host + + subroutine i_elg_set_dev(a) + implicit none + class(psb_i_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine i_elg_set_dev + + subroutine i_elg_set_sync(a) + implicit none + class(psb_i_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine i_elg_set_sync + + function i_elg_is_dev(a) result(res) + implicit none + class(psb_i_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function i_elg_is_dev + + function i_elg_is_host(a) result(res) + implicit none + class(psb_i_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function i_elg_is_host + + function i_elg_is_sync(a) result(res) + implicit none + class(psb_i_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function i_elg_is_sync + + subroutine i_elg_finalize(a) + use elldev_mod + implicit none + type(psb_i_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine i_elg_finalize + +#else + + interface + subroutine psb_i_elg_asb(a) + import :: psb_i_elg_sparse_mat + class(psb_i_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_elg_asb + end interface + + interface + subroutine psb_i_elg_mold(a,b,info) + import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_elg_mold + end interface + +#endif + +end module psb_i_elg_mat_mod diff --git a/gpu/psb_i_gpu_vect_mod.F90 b/gpu/psb_i_gpu_vect_mod.F90 new file mode 100644 index 00000000..ca4950a0 --- /dev/null +++ b/gpu/psb_i_gpu_vect_mod.F90 @@ -0,0 +1,1671 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_gpu_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_i_vect_mod +#ifdef HAVE_SPGPU + use psb_gpu_env_mod + use psb_i_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_base_vect_type) :: psb_i_vect_gpu +#ifdef HAVE_SPGPU + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + integer(c_int), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + integer(c_int), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => i_gpu_get_nrows + procedure, nopass :: get_fmt => i_gpu_get_fmt + + procedure, pass(x) :: all => i_gpu_all + procedure, pass(x) :: zero => i_gpu_zero + procedure, pass(x) :: asb_m => i_gpu_asb_m + procedure, pass(x) :: sync => i_gpu_sync + procedure, pass(x) :: sync_space => i_gpu_sync_space + procedure, pass(x) :: bld_x => i_gpu_bld_x + procedure, pass(x) :: bld_mn => i_gpu_bld_mn + procedure, pass(x) :: free => i_gpu_free + procedure, pass(x) :: ins_a => i_gpu_ins_a + procedure, pass(x) :: ins_v => i_gpu_ins_v + procedure, pass(x) :: is_host => i_gpu_is_host + procedure, pass(x) :: is_dev => i_gpu_is_dev + procedure, pass(x) :: is_sync => i_gpu_is_sync + procedure, pass(x) :: set_host => i_gpu_set_host + procedure, pass(x) :: set_dev => i_gpu_set_dev + procedure, pass(x) :: set_sync => i_gpu_set_sync + procedure, pass(x) :: set_scal => i_gpu_set_scal +!!$ procedure, pass(x) :: set_vect => i_gpu_set_vect + procedure, pass(x) :: gthzv_x => i_gpu_gthzv_x + procedure, pass(y) :: sctb => i_gpu_sctb + procedure, pass(y) :: sctb_x => i_gpu_sctb_x + procedure, pass(x) :: gthzbuf => i_gpu_gthzbuf + procedure, pass(y) :: sctb_buf => i_gpu_sctb_buf + procedure, pass(x) :: new_buffer => i_gpu_new_buffer + procedure, nopass :: device_wait => i_gpu_device_wait + procedure, pass(x) :: free_buffer => i_gpu_free_buffer + procedure, pass(x) :: maybe_free_buffer => i_gpu_maybe_free_buffer + + final :: i_gpu_vect_finalize +#endif + end type psb_i_vect_gpu + + public :: psb_i_vect_gpu_ + private :: constructor + interface psb_i_vect_gpu_ + module procedure constructor + end interface psb_i_vect_gpu_ + +contains + + function constructor(x) result(this) + integer(psb_ipk_) :: x(:) + type(psb_i_vect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + +#ifdef HAVE_SPGPU + + subroutine i_gpu_device_wait() + call psb_cudaSync() + end subroutine i_gpu_device_wait + + subroutine i_gpu_new_buffer(n,x,info) + use psb_realloc_mod + use psb_gpu_env_mod + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_gpu_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeInt(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateInt(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readInt(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeInt(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateInt(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceInt(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readInt(x%dt_buf,y,n) + + end select + + end subroutine i_gpu_gthzv_x + + subroutine i_gpu_gthzbuf(i,n,idx,x) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i,n + class(psb_i_base_vect_type) :: idx + class(psb_i_vect_gpu) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readInt(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceInt(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readInt(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine i_gpu_gthzbuf + + subroutine i_gpu_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_ipk_) :: n, idx(:) + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_gpu) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_i_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine i_gpu_sctb + + subroutine i_gpu_sctb_x(i,n,idx,x,beta,y) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_gpu) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_gpu_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeInt(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateInt(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeInt(y%dt_buf,x,n) + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeInt(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateInt(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeInt(y%dt_buf,x,n) + info = iscatMultiVecDeviceInt(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine i_gpu_sctb_x + + subroutine i_gpu_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_gpu_env_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta + class(psb_i_vect_gpu) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_gpu) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_gpu_DeviceHasUVA()) then + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeInt(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeInt(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceInt(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine i_gpu_sctb_buf + + + subroutine i_gpu_bld_x(x,this) + use psb_base_mod + integer(psb_ipk_), intent(in) :: this(:) + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_gpu_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine i_gpu_bld_x + + subroutine i_gpu_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'i_gpu_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine i_gpu_bld_mn + + subroutine i_gpu_set_host(x) + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine i_gpu_set_host + + subroutine i_gpu_set_dev(x) + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine i_gpu_set_dev + + subroutine i_gpu_set_sync(x) + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine i_gpu_set_sync + + function i_gpu_is_dev(x) result(res) + implicit none + class(psb_i_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function i_gpu_is_dev + + function i_gpu_is_host(x) result(res) + implicit none + class(psb_i_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function i_gpu_is_host + + function i_gpu_is_sync(x) result(res) + implicit none + class(psb_i_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function i_gpu_is_sync + + + function i_gpu_get_nrows(x) result(res) + implicit none + class(psb_i_vect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function i_gpu_get_nrows + + function i_gpu_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'iGPU' + end function i_gpu_get_fmt + + subroutine i_gpu_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_i_vect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_gpu_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine i_gpu_all + + subroutine i_gpu_zero(x) + use psi_serial_mod + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=izero + call x%set_host() + end subroutine i_gpu_zero + + subroutine i_gpu_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_i_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'i_gpu_sync') + end if + + end subroutine i_gpu_sync + + subroutine i_gpu_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine i_gpu_free + + subroutine i_gpu_set_scal(x,val,first,last) + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + if (x%is_host()) call x%sync() + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine i_gpu_set_scal +!!$ +!!$ subroutine i_gpu_set_vect(x,val) +!!$ class(psb_i_vect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: val(:) +!!$ integer(psb_ipk_) :: nr +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_i_base_vect_type%set_vect(val) +!!$ call x%set_host() +!!$ +!!$ end subroutine i_gpu_set_vect + + + + subroutine i_gpu_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine i_gpu_vect_finalize + + subroutine i_gpu_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_i_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_gpu + + info = 0 + if (psb_errstatus_fatal()) return + + done_gpu = .false. + select type(virl => irl) + class is (psb_i_vect_gpu) + select type(vval => val) + class is (psb_i_vect_gpu) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceInt(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_gpu=.true. + end select + end select + + if (.not.done_gpu) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'gpu_vect_ins') + return + end if + + end subroutine i_gpu_ins_v + + subroutine i_gpu_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + integer(psb_ipk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_i_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine i_gpu_ins_a + +#endif + +end module psb_i_gpu_vect_mod + + +! +! Multivectors +! + + + +module psb_i_gpu_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_i_multivect_mod + use psb_i_base_multivect_mod + +#ifdef HAVE_SPGPU + use psb_i_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_base_multivect_type) :: psb_i_multivect_gpu +#ifdef HAVE_SPGPU + + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => i_gpu_multi_get_nrows + procedure, pass(x) :: get_ncols => i_gpu_multi_get_ncols + procedure, nopass :: get_fmt => i_gpu_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => i_gpu_multi_dot_v +!!$ procedure, pass(x) :: dot_a => i_gpu_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => i_gpu_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => i_gpu_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => i_gpu_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => i_gpu_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => i_gpu_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => i_gpu_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => i_gpu_multi_scal +!!$ procedure, pass(x) :: nrm2 => i_gpu_multi_nrm2 +!!$ procedure, pass(x) :: amax => i_gpu_multi_amax +!!$ procedure, pass(x) :: asum => i_gpu_multi_asum + procedure, pass(x) :: all => i_gpu_multi_all + procedure, pass(x) :: zero => i_gpu_multi_zero + procedure, pass(x) :: asb => i_gpu_multi_asb + procedure, pass(x) :: sync => i_gpu_multi_sync + procedure, pass(x) :: sync_space => i_gpu_multi_sync_space + procedure, pass(x) :: bld_x => i_gpu_multi_bld_x + procedure, pass(x) :: bld_n => i_gpu_multi_bld_n + procedure, pass(x) :: free => i_gpu_multi_free + procedure, pass(x) :: ins => i_gpu_multi_ins + procedure, pass(x) :: is_host => i_gpu_multi_is_host + procedure, pass(x) :: is_dev => i_gpu_multi_is_dev + procedure, pass(x) :: is_sync => i_gpu_multi_is_sync + procedure, pass(x) :: set_host => i_gpu_multi_set_host + procedure, pass(x) :: set_dev => i_gpu_multi_set_dev + procedure, pass(x) :: set_sync => i_gpu_multi_set_sync + procedure, pass(x) :: set_scal => i_gpu_multi_set_scal + procedure, pass(x) :: set_vect => i_gpu_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => i_gpu_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => i_gpu_multi_sctb +!!$ procedure, pass(y) :: sctb_x => i_gpu_multi_sctb_x + final :: i_gpu_multi_vect_finalize +#endif + end type psb_i_multivect_gpu + + public :: psb_i_multivect_gpu + private :: constructor + interface psb_i_multivect_gpu + module procedure constructor + end interface + +contains + + function constructor(x) result(this) + integer(psb_ipk_) :: x(:,:) + type(psb_i_multivect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function constructor + +#ifdef HAVE_SPGPU + +!!$ subroutine i_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ integer(psb_ipk_) :: y(:) +!!$ class(psb_i_multivect_gpu) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine i_gpu_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine i_gpu_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ integer(psb_ipk_) :: beta, x(:) +!!$ class(psb_i_multivect_gpu) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_i_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine i_gpu_multi_sctb +!!$ +!!$ subroutine i_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ integer(psb_ipk_) :: beta, x(:) +!!$ class(psb_i_multivect_gpu) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine i_gpu_multi_sctb_x + + + subroutine i_gpu_multi_bld_x(x,this) + use psb_base_mod + integer(psb_ipk_), intent(in) :: this(:,:) + class(psb_i_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_gpu_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine i_gpu_multi_bld_x + + subroutine i_gpu_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'i_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine i_gpu_multi_bld_n + + + subroutine i_gpu_multi_set_host(x) + implicit none + class(psb_i_multivect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine i_gpu_multi_set_host + + subroutine i_gpu_multi_set_dev(x) + implicit none + class(psb_i_multivect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine i_gpu_multi_set_dev + + subroutine i_gpu_multi_set_sync(x) + implicit none + class(psb_i_multivect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine i_gpu_multi_set_sync + + function i_gpu_multi_is_dev(x) result(res) + implicit none + class(psb_i_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function i_gpu_multi_is_dev + + function i_gpu_multi_is_host(x) result(res) + implicit none + class(psb_i_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function i_gpu_multi_is_host + + function i_gpu_multi_is_sync(x) result(res) + implicit none + class(psb_i_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function i_gpu_multi_is_sync + + + function i_gpu_multi_get_nrows(x) result(res) + implicit none + class(psb_i_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function i_gpu_multi_get_nrows + + function i_gpu_multi_get_ncols(x) result(res) + implicit none + class(psb_i_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function i_gpu_multi_get_ncols + + function i_gpu_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'iGPU' + end function i_gpu_multi_get_fmt + +!!$ function i_gpu_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ integer(psb_ipk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = dzero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_i_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_i_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_i_multivect_gpu) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'i_gpu_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function i_gpu_multi_dot_v +!!$ +!!$ function i_gpu_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ integer(psb_ipk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function i_gpu_multi_dot_a +!!$ +!!$ subroutine i_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_i_base_multivect_type), intent(inout) :: x +!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ integer(psb_ipk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_i_base_multivect_type) +!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_i_multivect_gpu) +!!$ ! Do something different here +!!$ if ((beta /= dzero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_i_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_i_multivect_gpu) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine i_gpu_multi_mlt_v +!!$ +!!$ subroutine i_gpu_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: x(:) +!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_i_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine i_gpu_multi_mlt_a +!!$ +!!$ subroutine i_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: alpha,beta +!!$ integer(psb_ipk_), intent(in) :: x(:) +!!$ integer(psb_ipk_), intent(in) :: y(:) +!!$ class(psb_i_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_i_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine i_gpu_multi_mlt_a_2 +!!$ +!!$ subroutine i_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: alpha,beta +!!$ class(psb_i_base_multivect_type), intent(inout) :: x +!!$ class(psb_i_base_multivect_type), intent(inout) :: y +!!$ class(psb_i_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_i_multivect_gpu) +!!$ select type (yy => y) +!!$ type is (psb_i_multivect_gpu) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_i_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_i_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine i_gpu_multi_mlt_v_2 + + + subroutine i_gpu_multi_set_scal(x,val) + class(psb_i_multivect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_i_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine i_gpu_multi_set_scal + + subroutine i_gpu_multi_set_vect(x,val) + class(psb_i_multivect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_i_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine i_gpu_multi_set_vect + + + +!!$ subroutine i_gpu_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_i_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine i_gpu_multi_scal +!!$ +!!$ +!!$ function i_gpu_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function i_gpu_multi_nrm2 +!!$ +!!$ function i_gpu_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function i_gpu_multi_amax +!!$ +!!$ function i_gpu_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function i_gpu_multi_asum + + subroutine i_gpu_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_multivect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=izero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_gpu_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine i_gpu_multi_all + + subroutine i_gpu_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_i_multivect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=dzero + call x%set_host() + end subroutine i_gpu_multi_zero + + subroutine i_gpu_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_multivect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_i_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc i_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => i_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_i_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_i_hdiag_csmm + procedure, pass(a) :: csmv => psb_i_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_i_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_i_hdiag_scals + ! procedure, pass(a) :: scalv => psb_i_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_i_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_i_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_i_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_i_mv_hdiag_from_fmt + procedure, pass(a) :: free => i_hdiag_free + procedure, pass(a) :: mold => psb_i_hdiag_mold + procedure, pass(a) :: to_gpu => psb_i_hdiag_to_gpu + final :: i_hdiag_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_hdiag_mold +#endif + end type psb_i_hdiag_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_hdiag_get_nzeros, i_hdiag_free, i_hdiag_get_fmt, & + & i_hdiag_get_size, i_hdiag_sizeof, i_hdiag_get_nz_row + + + interface + subroutine psb_i_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_hdiag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_i_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_i_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type +!!$ class(psb_i_hdiag_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta +!!$ class(psb_i_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_i_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_i_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_i_hdiag_mold(a,b,info) + import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hdiag_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hdiag_mold + end interface + + interface + subroutine psb_i_hdiag_to_gpu(a,info) + import :: psb_i_hdiag_sparse_mat, psb_ipk_ + class(psb_i_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hdiag_to_gpu + end interface + + interface + subroutine psb_i_cp_hdiag_from_coo(a,b,info) + import :: psb_i_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_hdiag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_i_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ +!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_i_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_i_mv_hdiag_from_coo(a,b,info) + import :: psb_i_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_hdiag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_i_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ +!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_i_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_i_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hdiag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_i_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_hdiag_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) +!!$ integer(psb_ipk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_hdiag_scal(d,a,info, side) +!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_i_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_hdiag_scals(d,a,info) +!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function i_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function i_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_hdia_sparse_mat%free() + + return + + end subroutine i_hdiag_free + + subroutine i_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_i_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_hdia_sparse_mat%free() + + return + end subroutine i_hdiag_finalize + +#else + + interface + subroutine psb_i_hdiag_mold(a,b,info) + import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hdiag_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hdiag_mold + end interface + +#endif + +end module psb_i_hdiag_mat_mod diff --git a/gpu/psb_i_hlg_mat_mod.F90 b/gpu/psb_i_hlg_mat_mod.F90 new file mode 100644 index 00000000..92917d47 --- /dev/null +++ b/gpu/psb_i_hlg_mat_mod.F90 @@ -0,0 +1,398 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_hlg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use psb_i_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_hll_sparse_mat) :: psb_i_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => i_hlg_get_fmt + procedure, pass(a) :: sizeof => i_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_i_hlg_vect_mv + procedure, pass(a) :: csmm => psb_i_hlg_csmm + procedure, pass(a) :: csmv => psb_i_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_i_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_i_hlg_scals + procedure, pass(a) :: scalv => psb_i_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_i_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_mv_hlg_from_fmt + procedure, pass(a) :: free => i_hlg_free + procedure, pass(a) :: mold => psb_i_hlg_mold + procedure, pass(a) :: is_host => i_hlg_is_host + procedure, pass(a) :: is_dev => i_hlg_is_dev + procedure, pass(a) :: is_sync => i_hlg_is_sync + procedure, pass(a) :: set_host => i_hlg_set_host + procedure, pass(a) :: set_dev => i_hlg_set_dev + procedure, pass(a) :: set_sync => i_hlg_set_sync + procedure, pass(a) :: sync => i_hlg_sync + procedure, pass(a) :: from_gpu => psb_i_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_i_hlg_to_gpu + final :: i_hlg_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_hlg_mold +#endif + end type psb_i_hlg_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_hlg_get_nzeros, i_hlg_free, i_hlg_get_fmt, & + & i_hlg_get_size, i_hlg_sizeof, i_hlg_get_nz_row + + + interface + subroutine psb_i_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hlg_vect_mv + end interface + + interface + subroutine psb_i_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hlg_inner_vect_sv + end interface + + interface + subroutine psb_i_hlg_reallocate_nz(nz,a) + import :: psb_i_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_i_hlg_reallocate_nz + end interface + + interface + subroutine psb_i_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_i_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_hlg_allocate_mnnz + end interface + + interface + subroutine psb_i_hlg_mold(a,b,info) + import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hlg_mold + end interface + + interface + subroutine psb_i_hlg_from_gpu(a,info) + import :: psb_i_hlg_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hlg_from_gpu + end interface + + interface + subroutine psb_i_hlg_to_gpu(a,info, nzrm) + import :: psb_i_hlg_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_hlg_to_gpu + end interface + + interface + subroutine psb_i_cp_hlg_from_coo(a,b,info) + import :: psb_i_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_hlg_from_coo + end interface + + interface + subroutine psb_i_cp_hlg_from_fmt(a,b,info) + import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_hlg_from_fmt + end interface + + interface + subroutine psb_i_mv_hlg_from_coo(a,b,info) + import :: psb_i_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_hlg_from_coo + end interface + + + interface + subroutine psb_i_mv_hlg_from_fmt(a,b,info) + import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_hlg_from_fmt + end interface + + interface + subroutine psb_i_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hlg_csmv + end interface + interface + subroutine psb_i_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hlg_csmm + end interface + + interface + subroutine psb_i_hlg_scal(d,a,info, side) + import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_hlg_scal + end interface + + interface + subroutine psb_i_hlg_scals(d,a,info) + import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_hlg_sizeof(a) result(res) + implicit none + class(psb_i_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_hlg_sizeof + + function i_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function i_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_hll_sparse_mat%free() + + return + + end subroutine i_hlg_free + + + subroutine i_hlg_sync(a) + implicit none + class(psb_i_hlg_sparse_mat), target, intent(in) :: a + class(psb_i_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine i_hlg_sync + + subroutine i_hlg_set_host(a) + implicit none + class(psb_i_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine i_hlg_set_host + + subroutine i_hlg_set_dev(a) + implicit none + class(psb_i_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine i_hlg_set_dev + + subroutine i_hlg_set_sync(a) + implicit none + class(psb_i_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine i_hlg_set_sync + + function i_hlg_is_dev(a) result(res) + implicit none + class(psb_i_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function i_hlg_is_dev + + function i_hlg_is_host(a) result(res) + implicit none + class(psb_i_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function i_hlg_is_host + + function i_hlg_is_sync(a) result(res) + implicit none + class(psb_i_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function i_hlg_is_sync + + + subroutine i_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_i_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine i_hlg_finalize + +#else + + interface + subroutine psb_i_hlg_mold(a,b,info) + import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hlg_mold + end interface + +#endif + +end module psb_i_hlg_mat_mod diff --git a/gpu/psb_i_hybg_mat_mod.F90 b/gpu/psb_i_hybg_mat_mod.F90 new file mode 100644 index 00000000..9e682365 --- /dev/null +++ b/gpu/psb_i_hybg_mat_mod.F90 @@ -0,0 +1,306 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if CUDA_SHORT_VERSION <= 10 + +module psb_i_hybg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use cusparse_mod + + type, extends(psb_i_csr_sparse_mat) :: psb_i_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(i_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => i_hybg_get_fmt + procedure, pass(a) :: sizeof => i_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_i_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_i_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_i_hybg_csmm + procedure, pass(a) :: csmv => psb_i_hybg_csmv + procedure, pass(a) :: scals => psb_i_hybg_scals + procedure, pass(a) :: scalv => psb_i_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_i_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_mv_hybg_from_fmt + procedure, pass(a) :: free => i_hybg_free + procedure, pass(a) :: mold => psb_i_hybg_mold + procedure, pass(a) :: to_gpu => psb_i_hybg_to_gpu + final :: i_hybg_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_hybg_mold +#endif + end type psb_i_hybg_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_hybg_get_nzeros, i_hybg_free, i_hybg_get_fmt, & + & i_hybg_get_size, i_hybg_sizeof, i_hybg_get_nz_row + + + interface + subroutine psb_i_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hybg_inner_vect_sv + end interface + + interface + subroutine psb_i_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hybg_vect_mv + end interface + + interface + subroutine psb_i_hybg_reallocate_nz(nz,a) + import :: psb_i_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_i_hybg_reallocate_nz + end interface + + interface + subroutine psb_i_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_i_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_hybg_allocate_mnnz + end interface + + interface + subroutine psb_i_hybg_mold(a,b,info) + import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hybg_mold + end interface + + interface + subroutine psb_i_hybg_to_gpu(a,info, nzrm) + import :: psb_i_hybg_sparse_mat, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_hybg_to_gpu + end interface + + interface + subroutine psb_i_cp_hybg_from_coo(a,b,info) + import :: psb_i_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_hybg_from_coo + end interface + + interface + subroutine psb_i_cp_hybg_from_fmt(a,b,info) + import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_hybg_from_fmt + end interface + + interface + subroutine psb_i_mv_hybg_from_coo(a,b,info) + import :: psb_i_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_hybg_from_coo + end interface + + interface + subroutine psb_i_mv_hybg_from_fmt(a,b,info) + import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_hybg_from_fmt + end interface + + interface + subroutine psb_i_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hybg_csmv + end interface + interface + subroutine psb_i_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hybg_csmm + end interface + + interface + subroutine psb_i_hybg_scal(d,a,info,side) + import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_hybg_scal + end interface + + interface + subroutine psb_i_hybg_scals(d,a,info) + import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_hybg_sizeof(a) result(res) + implicit none + class(psb_i_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_hybg_sizeof + + function i_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function i_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_i_csr_sparse_mat%free() + + return + + end subroutine i_hybg_free + + subroutine i_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_i_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine i_hybg_finalize + +#else + + interface + subroutine psb_i_hybg_mold(a,b,info) + import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hybg_mold + end interface + +#endif + +end module psb_i_hybg_mat_mod +#endif diff --git a/gpu/psb_i_vectordev_mod.F90 b/gpu/psb_i_vectordev_mod.F90 new file mode 100644 index 00000000..9998d355 --- /dev/null +++ b/gpu/psb_i_vectordev_mod.F90 @@ -0,0 +1,283 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_vectordev_mod + + use psb_base_vectordev_mod + +#ifdef HAVE_SPGPU + + interface registerMapped + function registerMappedInt(buf,d_p,n,dummy) & + & result(res) bind(c,name='registerMappedInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: buf + type(c_ptr) :: d_p + integer(c_int),value :: n + integer(c_int), value :: dummy + end function registerMappedInt + end interface + + interface writeMultiVecDevice + function writeMultiVecDeviceInt(deviceVec,hostVec) & + & result(res) bind(c,name='writeMultiVecDeviceInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int) :: hostVec(*) + end function writeMultiVecDeviceInt + function writeMultiVecDeviceIntR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='writeMultiVecDeviceIntR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + integer(c_int) :: hostVec(ld,*) + end function writeMultiVecDeviceIntR2 + end interface + + interface readMultiVecDevice + function readMultiVecDeviceInt(deviceVec,hostVec) & + & result(res) bind(c,name='readMultiVecDeviceInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int) :: hostVec(*) + end function readMultiVecDeviceInt + function readMultiVecDeviceIntR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='readMultiVecDeviceIntR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + integer(c_int) :: hostVec(ld,*) + end function readMultiVecDeviceIntR2 + end interface + + interface allocateInt + function allocateInt(didx,n) & + & result(res) bind(c,name='allocateInt') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: n + integer(c_int) :: res + end function allocateInt + function allocateMultiInt(didx,m,n) & + & result(res) bind(c,name='allocateMultiInt') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: m,n + integer(c_int) :: res + end function allocateMultiInt + end interface + + interface writeInt + function writeInt(didx,hidx,n) & + & result(res) bind(c,name='writeInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + integer(c_int) :: hidx(*) + integer(c_int),value :: n + end function writeInt + function writeIntFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='writeIntFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + integer(c_int) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function writeIntFirst + function writeMultiInt(didx,hidx,m,n) & + & result(res) bind(c,name='writeMultiInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + integer(c_int) :: hidx(m,*) + integer(c_int),value :: m,n + end function writeMultiInt + end interface + + interface readInt + function readInt(didx,hidx,n) & + & result(res) bind(c,name='readInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + integer(c_int) :: hidx(*) + integer(c_int),value :: n + end function readInt + function readIntFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='readIntFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + integer(c_int) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function readIntFirst + function readMultiInt(didx,hidx,m,n) & + & result(res) bind(c,name='readMultiInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + integer(c_int) :: hidx(m,*) + integer(c_int),value :: m,n + end function readMultiInt + end interface + + interface + subroutine freeInt(didx) & + & bind(c,name='freeInt') + use iso_c_binding + type(c_ptr), value :: didx + end subroutine freeInt + end interface + + + interface setScalDevice + function setScalMultiVecDeviceInt(val, first, last, & + & indexBase, deviceVecX) result(res) & + & bind(c,name='setscalMultiVecDeviceInt') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: first,last,indexbase + integer(c_int), value :: val + type(c_ptr), value :: deviceVecX + end function setScalMultiVecDeviceInt + end interface + + interface + function geinsMultiVecDeviceInt(n,deviceVecIrl,deviceVecVal,& + & dupl,indexbase,deviceVecX) & + & result(res) bind(c,name='geinsMultiVecDeviceInt') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n, dupl,indexbase + type(c_ptr), value :: deviceVecIrl, deviceVecVal, deviceVecX + end function geinsMultiVecDeviceInt + end interface + + ! New gather functions + + interface + function igathMultiVecDeviceInt(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceInt + end interface + + interface + function igathMultiVecDeviceIntVecIdx(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceIntVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceIntVecIdx + end interface + + interface + function iscatMultiVecDeviceInt(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + integer(c_int),value :: beta + end function iscatMultiVecDeviceInt + end interface + + interface + function iscatMultiVecDeviceIntVecIdx(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceIntVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + integer(c_int),value :: beta + end function iscatMultiVecDeviceIntVecIdx + end interface + + + + interface inner_register + module procedure inner_registerInt + end interface + + interface inner_unregister + module procedure inner_unregisterInt + end interface + +contains + + + function inner_registerInt(buffer,dval) result(res) + integer(c_int), allocatable, target :: buffer(:) + type(c_ptr) :: dval + integer(c_int) :: res + integer(c_int) :: dummy + res = registerMapped(c_loc(buffer),dval,size(buffer), dummy) + end function inner_registerInt + + subroutine inner_unregisterInt(buffer) + integer(c_int), allocatable, target :: buffer(:) + + call unregisterMapped(c_loc(buffer)) + end subroutine inner_unregisterInt + +#endif + +end module psb_i_vectordev_mod diff --git a/gpu/psb_s_csrg_mat_mod.F90 b/gpu/psb_s_csrg_mat_mod.F90 new file mode 100644 index 00000000..cface9f5 --- /dev/null +++ b/gpu/psb_s_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_csrg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_csr_sparse_mat) :: psb_s_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(s_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => s_csrg_get_fmt + procedure, pass(a) :: sizeof => s_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_s_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_csrg_csmm + procedure, pass(a) :: csmv => psb_s_csrg_csmv + procedure, pass(a) :: scals => psb_s_csrg_scals + procedure, pass(a) :: scalv => psb_s_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_s_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_mv_csrg_from_fmt + procedure, pass(a) :: free => s_csrg_free + procedure, pass(a) :: mold => psb_s_csrg_mold + procedure, pass(a) :: is_host => s_csrg_is_host + procedure, pass(a) :: is_dev => s_csrg_is_dev + procedure, pass(a) :: is_sync => s_csrg_is_sync + procedure, pass(a) :: set_host => s_csrg_set_host + procedure, pass(a) :: set_dev => s_csrg_set_dev + procedure, pass(a) :: set_sync => s_csrg_set_sync + procedure, pass(a) :: sync => s_csrg_sync + procedure, pass(a) :: to_gpu => psb_s_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_s_csrg_from_gpu + final :: s_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_csrg_mold +#endif + end type psb_s_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_csrg_get_nzeros, s_csrg_free, s_csrg_get_fmt, & + & s_csrg_get_size, s_csrg_sizeof, s_csrg_get_nz_row + + + interface + subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_s_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_csrg_vect_mv + end interface + + interface + subroutine psb_s_csrg_reallocate_nz(nz,a) + import :: psb_s_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_s_csrg_reallocate_nz + end interface + + interface + subroutine psb_s_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_s_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_csrg_allocate_mnnz + end interface + + interface + subroutine psb_s_csrg_mold(a,b,info) + import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_csrg_mold + end interface + + interface + subroutine psb_s_csrg_to_gpu(a,info, nzrm) + import :: psb_s_csrg_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_csrg_to_gpu + end interface + + interface + subroutine psb_s_csrg_from_gpu(a,info) + import :: psb_s_csrg_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_csrg_from_gpu + end interface + + interface + subroutine psb_s_cp_csrg_from_coo(a,b,info) + import :: psb_s_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_csrg_from_coo + end interface + + interface + subroutine psb_s_cp_csrg_from_fmt(a,b,info) + import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_csrg_from_fmt + end interface + + interface + subroutine psb_s_mv_csrg_from_coo(a,b,info) + import :: psb_s_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_csrg_from_coo + end interface + + interface + subroutine psb_s_mv_csrg_from_fmt(a,b,info) + import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_csrg_from_fmt + end interface + + interface + subroutine psb_s_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_csrg_csmv + end interface + interface + subroutine psb_s_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_csrg_csmm + end interface + + interface + subroutine psb_s_csrg_scal(d,a,info,side) + import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_csrg_scal + end interface + + interface + subroutine psb_s_csrg_scals(d,a,info) + import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_csrg_sizeof(a) result(res) + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_csrg_sizeof + + function s_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function s_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine s_csrg_set_host(a) + implicit none + class(psb_s_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_csrg_set_host + + subroutine s_csrg_set_dev(a) + implicit none + class(psb_s_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_csrg_set_dev + + subroutine s_csrg_set_sync(a) + implicit none + class(psb_s_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_csrg_set_sync + + function s_csrg_is_dev(a) result(res) + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_csrg_is_dev + + function s_csrg_is_host(a) result(res) + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_csrg_is_host + + function s_csrg_is_sync(a) result(res) + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_csrg_is_sync + + + subroutine s_csrg_sync(a) + implicit none + class(psb_s_csrg_sparse_mat), target, intent(in) :: a + class(psb_s_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine s_csrg_sync + + subroutine s_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_s_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_s_csr_sparse_mat%free() + + return + + end subroutine s_csrg_free + + subroutine s_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_s_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine s_csrg_finalize + +#else + interface + subroutine psb_s_csrg_mold(a,b,info) + import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_csrg_mold + end interface + +#endif + +end module psb_s_csrg_mat_mod diff --git a/gpu/psb_s_diag_mat_mod.F90 b/gpu/psb_s_diag_mat_mod.F90 new file mode 100644 index 00000000..1ed54f88 --- /dev/null +++ b/gpu/psb_s_diag_mat_mod.F90 @@ -0,0 +1,308 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_s_dia_mat_mod + + type, extends(psb_s_dia_sparse_mat) :: psb_s_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => s_diag_get_fmt + procedure, pass(a) :: sizeof => s_diag_sizeof + procedure, pass(a) :: vect_mv => psb_s_diag_vect_mv +! procedure, pass(a) :: csmm => psb_s_diag_csmm + procedure, pass(a) :: csmv => psb_s_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_s_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_s_diag_scals +! procedure, pass(a) :: scalv => psb_s_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_s_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_s_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_s_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_s_mv_diag_from_fmt + procedure, pass(a) :: free => s_diag_free + procedure, pass(a) :: mold => psb_s_diag_mold + procedure, pass(a) :: to_gpu => psb_s_diag_to_gpu + final :: s_diag_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_diag_mold +#endif + end type psb_s_diag_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_diag_get_nzeros, s_diag_free, s_diag_get_fmt, & + & s_diag_get_size, s_diag_sizeof, s_diag_get_nz_row + + + interface + subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_diag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_diag_vect_mv + end interface + + interface + subroutine psb_s_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_diag_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_diag_inner_vect_sv + end interface + + interface + subroutine psb_s_diag_reallocate_nz(nz,a) + import :: psb_s_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_diag_sparse_mat), intent(inout) :: a + end subroutine psb_s_diag_reallocate_nz + end interface + + interface + subroutine psb_s_diag_allocate_mnnz(m,n,a,nz) + import :: psb_s_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_diag_allocate_mnnz + end interface + + interface + subroutine psb_s_diag_mold(a,b,info) + import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_diag_mold + end interface + + interface + subroutine psb_s_diag_to_gpu(a,info, nzrm) + import :: psb_s_diag_sparse_mat, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_diag_to_gpu + end interface + + interface + subroutine psb_s_cp_diag_from_coo(a,b,info) + import :: psb_s_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_diag_from_coo + end interface + + interface + subroutine psb_s_cp_diag_from_fmt(a,b,info) + import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_diag_from_fmt + end interface + + interface + subroutine psb_s_mv_diag_from_coo(a,b,info) + import :: psb_s_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_diag_from_coo + end interface + + + interface + subroutine psb_s_mv_diag_from_fmt(a,b,info) + import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_diag_from_fmt + end interface + + interface + subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_diag_csmv + end interface + interface + subroutine psb_s_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_diag_csmm + end interface + + interface + subroutine psb_s_diag_scal(d,a,info, side) + import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_diag_scal + end interface + + interface + subroutine psb_s_diag_scals(d,a,info) + import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_diag_sizeof(a) result(res) + implicit none + class(psb_s_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + psb_sizeof_sp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_diag_sizeof + + function s_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function s_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_dia_sparse_mat%free() + + return + + end subroutine s_diag_free + + subroutine s_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_s_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine s_diag_finalize + +#else + + interface + subroutine psb_s_diag_mold(a,b,info) + import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_diag_mold + end interface + +#endif + +end module psb_s_diag_mat_mod diff --git a/gpu/psb_s_dnsg_mat_mod.F90 b/gpu/psb_s_dnsg_mat_mod.F90 new file mode 100644 index 00000000..1c531463 --- /dev/null +++ b/gpu/psb_s_dnsg_mat_mod.F90 @@ -0,0 +1,294 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_dnsg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use psb_s_dns_mat_mod + use dnsdev_mod + + type, extends(psb_s_dns_sparse_mat) :: psb_s_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of DNS, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => s_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => s_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_s_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_s_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_s_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_s_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_s_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_s_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_s_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_s_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_s_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_mv_dnsg_from_fmt + procedure, pass(a) :: free => s_dnsg_free + procedure, pass(a) :: mold => psb_s_dnsg_mold + procedure, pass(a) :: to_gpu => psb_s_dnsg_to_gpu + final :: s_dnsg_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_dnsg_mold +#endif + end type psb_s_dnsg_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_dnsg_get_nzeros, s_dnsg_free, s_dnsg_get_fmt, & + & s_dnsg_get_size, s_dnsg_get_nz_row + + + interface + subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_s_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_s_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_s_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_s_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_s_dnsg_mold(a,b,info) + import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dnsg_mold + end interface + + interface + subroutine psb_s_dnsg_to_gpu(a,info) + import :: psb_s_dnsg_sparse_mat, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dnsg_to_gpu + end interface + + interface + subroutine psb_s_cp_dnsg_from_coo(a,b,info) + import :: psb_s_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dnsg_from_coo + end interface + + interface + subroutine psb_s_cp_dnsg_from_fmt(a,b,info) + import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_s_mv_dnsg_from_coo(a,b,info) + import :: psb_s_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_s_mv_dnsg_from_fmt(a,b,info) + import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_s_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_s_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_dnsg_scal(d,a,info, side) +!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_s_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_dnsg_scals(d,a,info) +!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function s_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function s_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_dns_sparse_mat%free() + + return + + end subroutine s_dnsg_free + + subroutine s_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_s_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine s_dnsg_finalize + +#else + + interface + subroutine psb_s_dnsg_mold(a,b,info) + import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dnsg_mold + end interface + +#endif + +end module psb_s_dnsg_mat_mod diff --git a/gpu/psb_s_elg_mat_mod.F90 b/gpu/psb_s_elg_mat_mod.F90 new file mode 100644 index 00000000..5c4eae9b --- /dev/null +++ b/gpu/psb_s_elg_mat_mod.F90 @@ -0,0 +1,483 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_elg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use psb_s_ell_mat_mod + use psb_i_gpu_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_ell_sparse_mat) :: psb_s_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => s_elg_get_fmt + procedure, pass(a) :: sizeof => s_elg_sizeof + procedure, pass(a) :: vect_mv => psb_s_elg_vect_mv + procedure, pass(a) :: csmm => psb_s_elg_csmm + procedure, pass(a) :: csmv => psb_s_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_s_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_s_elg_scals + procedure, pass(a) :: scalv => psb_s_elg_scal + procedure, pass(a) :: reallocate_nz => psb_s_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_elg_allocate_mnnz + procedure, pass(a) :: reinit => s_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_mv_elg_from_fmt + procedure, pass(a) :: free => s_elg_free + procedure, pass(a) :: mold => psb_s_elg_mold + procedure, pass(a) :: csput_a => psb_s_elg_csput_a + procedure, pass(a) :: csput_v => psb_s_elg_csput_v + procedure, pass(a) :: is_host => s_elg_is_host + procedure, pass(a) :: is_dev => s_elg_is_dev + procedure, pass(a) :: is_sync => s_elg_is_sync + procedure, pass(a) :: set_host => s_elg_set_host + procedure, pass(a) :: set_dev => s_elg_set_dev + procedure, pass(a) :: set_sync => s_elg_set_sync + procedure, pass(a) :: sync => s_elg_sync + procedure, pass(a) :: from_gpu => psb_s_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_s_elg_to_gpu + procedure, pass(a) :: asb => psb_s_elg_asb + final :: s_elg_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_elg_mold + procedure, pass(a) :: asb => psb_s_elg_asb +#endif + end type psb_s_elg_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_elg_get_nzeros, s_elg_free, s_elg_get_fmt, & + & s_elg_get_size, s_elg_sizeof, s_elg_get_nz_row, s_elg_sync + + + interface + subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_elg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_elg_vect_mv + end interface + + interface + subroutine psb_s_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_elg_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_elg_inner_vect_sv + end interface + + interface + subroutine psb_s_elg_reallocate_nz(nz,a) + import :: psb_s_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_elg_reallocate_nz + end interface + + interface + subroutine psb_s_elg_allocate_mnnz(m,n,a,nz) + import :: psb_s_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_elg_allocate_mnnz + end interface + + interface + subroutine psb_s_elg_mold(a,b,info) + import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_elg_mold + end interface + + interface + subroutine psb_s_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_elg_csput_a + end interface + + interface + subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_s_base_vect_type,& + & psb_i_base_vect_type + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_elg_csput_v + end interface + + interface + subroutine psb_s_elg_from_gpu(a,info) + import :: psb_s_elg_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_elg_from_gpu + end interface + + interface + subroutine psb_s_elg_to_gpu(a,info, nzrm) + import :: psb_s_elg_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_elg_to_gpu + end interface + + interface + subroutine psb_s_cp_elg_from_coo(a,b,info) + import :: psb_s_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_elg_from_coo + end interface + + interface + subroutine psb_s_cp_elg_from_fmt(a,b,info) + import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_elg_from_fmt + end interface + + interface + subroutine psb_s_mv_elg_from_coo(a,b,info) + import :: psb_s_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_elg_from_coo + end interface + + + interface + subroutine psb_s_mv_elg_from_fmt(a,b,info) + import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_elg_from_fmt + end interface + + interface + subroutine psb_s_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_elg_csmv + end interface + interface + subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_elg_csmm + end interface + + interface + subroutine psb_s_elg_scal(d,a,info, side) + import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_elg_scal + end interface + + interface + subroutine psb_s_elg_scals(d,a,info) + import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_elg_scals + end interface + + interface + subroutine psb_s_elg_asb(a) + import :: psb_s_elg_sparse_mat + class(psb_s_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_elg_sizeof(a) result(res) + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_elg_sizeof + + function s_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function s_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine s_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_s_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = szero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine s_elg_reinit + + subroutine s_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_s_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine s_elg_free + + subroutine s_elg_sync(a) + implicit none + class(psb_s_elg_sparse_mat), target, intent(in) :: a + class(psb_s_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine s_elg_sync + + subroutine s_elg_set_host(a) + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_elg_set_host + + subroutine s_elg_set_dev(a) + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_elg_set_dev + + subroutine s_elg_set_sync(a) + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_elg_set_sync + + function s_elg_is_dev(a) result(res) + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_elg_is_dev + + function s_elg_is_host(a) result(res) + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_elg_is_host + + function s_elg_is_sync(a) result(res) + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_elg_is_sync + + subroutine s_elg_finalize(a) + use elldev_mod + implicit none + type(psb_s_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine s_elg_finalize + +#else + + interface + subroutine psb_s_elg_asb(a) + import :: psb_s_elg_sparse_mat + class(psb_s_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_elg_asb + end interface + + interface + subroutine psb_s_elg_mold(a,b,info) + import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_elg_mold + end interface + +#endif + +end module psb_s_elg_mat_mod diff --git a/gpu/psb_s_gpu_vect_mod.F90 b/gpu/psb_s_gpu_vect_mod.F90 new file mode 100644 index 00000000..1371db53 --- /dev/null +++ b/gpu/psb_s_gpu_vect_mod.F90 @@ -0,0 +1,1989 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_gpu_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_s_vect_mod + use psb_i_vect_mod +#ifdef HAVE_SPGPU + use psb_gpu_env_mod + use psb_i_gpu_vect_mod + use psb_i_vectordev_mod + use psb_s_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_base_vect_type) :: psb_s_vect_gpu +#ifdef HAVE_SPGPU + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + real(c_float), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + real(c_float), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => s_gpu_get_nrows + procedure, nopass :: get_fmt => s_gpu_get_fmt + + procedure, pass(x) :: all => s_gpu_all + procedure, pass(x) :: zero => s_gpu_zero + procedure, pass(x) :: asb_m => s_gpu_asb_m + procedure, pass(x) :: sync => s_gpu_sync + procedure, pass(x) :: sync_space => s_gpu_sync_space + procedure, pass(x) :: bld_x => s_gpu_bld_x + procedure, pass(x) :: bld_mn => s_gpu_bld_mn + procedure, pass(x) :: free => s_gpu_free + procedure, pass(x) :: ins_a => s_gpu_ins_a + procedure, pass(x) :: ins_v => s_gpu_ins_v + procedure, pass(x) :: is_host => s_gpu_is_host + procedure, pass(x) :: is_dev => s_gpu_is_dev + procedure, pass(x) :: is_sync => s_gpu_is_sync + procedure, pass(x) :: set_host => s_gpu_set_host + procedure, pass(x) :: set_dev => s_gpu_set_dev + procedure, pass(x) :: set_sync => s_gpu_set_sync + procedure, pass(x) :: set_scal => s_gpu_set_scal +!!$ procedure, pass(x) :: set_vect => s_gpu_set_vect + procedure, pass(x) :: gthzv_x => s_gpu_gthzv_x + procedure, pass(y) :: sctb => s_gpu_sctb + procedure, pass(y) :: sctb_x => s_gpu_sctb_x + procedure, pass(x) :: gthzbuf => s_gpu_gthzbuf + procedure, pass(y) :: sctb_buf => s_gpu_sctb_buf + procedure, pass(x) :: new_buffer => s_gpu_new_buffer + procedure, nopass :: device_wait => s_gpu_device_wait + procedure, pass(x) :: free_buffer => s_gpu_free_buffer + procedure, pass(x) :: maybe_free_buffer => s_gpu_maybe_free_buffer + procedure, pass(x) :: dot_v => s_gpu_dot_v + procedure, pass(x) :: dot_a => s_gpu_dot_a + procedure, pass(y) :: axpby_v => s_gpu_axpby_v + procedure, pass(y) :: axpby_a => s_gpu_axpby_a + procedure, pass(y) :: mlt_v => s_gpu_mlt_v + procedure, pass(y) :: mlt_a => s_gpu_mlt_a + procedure, pass(z) :: mlt_a_2 => s_gpu_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => s_gpu_mlt_v_2 + procedure, pass(x) :: scal => s_gpu_scal + procedure, pass(x) :: nrm2 => s_gpu_nrm2 + procedure, pass(x) :: amax => s_gpu_amax + procedure, pass(x) :: asum => s_gpu_asum + procedure, pass(x) :: absval1 => s_gpu_absval1 + procedure, pass(x) :: absval2 => s_gpu_absval2 + + final :: s_gpu_vect_finalize +#endif + end type psb_s_vect_gpu + + public :: psb_s_vect_gpu_ + private :: constructor + interface psb_s_vect_gpu_ + module procedure constructor + end interface psb_s_vect_gpu_ + +contains + + function constructor(x) result(this) + real(psb_spk_) :: x(:) + type(psb_s_vect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + +#ifdef HAVE_SPGPU + + subroutine s_gpu_device_wait() + call psb_cudaSync() + end subroutine s_gpu_device_wait + + subroutine s_gpu_new_buffer(n,x,info) + use psb_realloc_mod + use psb_gpu_env_mod + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_gpu_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloat(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloat(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloat(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloat(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloat(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceFloat(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloat(x%dt_buf,y,n) + + end select + + end subroutine s_gpu_gthzv_x + + subroutine s_gpu_gthzbuf(i,n,idx,x) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i,n + class(psb_i_base_vect_type) :: idx + class(psb_s_vect_gpu) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readFloat(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceFloat(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readFloat(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine s_gpu_gthzbuf + + subroutine s_gpu_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_ipk_) :: n, idx(:) + real(psb_spk_) :: beta, x(:) + class(psb_s_vect_gpu) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_s_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine s_gpu_sctb + + subroutine s_gpu_sctb_x(i,n,idx,x,beta,y) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_spk_) :: beta, x(:) + class(psb_s_vect_gpu) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_gpu_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloat(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloat(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeFloat(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloat(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloat(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeFloat(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloat(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine s_gpu_sctb_x + + subroutine s_gpu_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_gpu_env_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_spk_) :: beta + class(psb_s_vect_gpu) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_gpu) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_gpu_DeviceHasUVA()) then + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeFloat(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeFloat(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceFloat(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine s_gpu_sctb_buf + + + subroutine s_gpu_bld_x(x,this) + use psb_base_mod + real(psb_spk_), intent(in) :: this(:) + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_gpu_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine s_gpu_bld_x + + subroutine s_gpu_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'s_gpu_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine s_gpu_bld_mn + + subroutine s_gpu_set_host(x) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine s_gpu_set_host + + subroutine s_gpu_set_dev(x) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine s_gpu_set_dev + + subroutine s_gpu_set_sync(x) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine s_gpu_set_sync + + function s_gpu_is_dev(x) result(res) + implicit none + class(psb_s_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function s_gpu_is_dev + + function s_gpu_is_host(x) result(res) + implicit none + class(psb_s_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function s_gpu_is_host + + function s_gpu_is_sync(x) result(res) + implicit none + class(psb_s_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function s_gpu_is_sync + + + function s_gpu_get_nrows(x) result(res) + implicit none + class(psb_s_vect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function s_gpu_get_nrows + + function s_gpu_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'sGPU' + end function s_gpu_get_fmt + + subroutine s_gpu_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_s_vect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_gpu_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine s_gpu_all + + subroutine s_gpu_zero(x) + use psi_serial_mod + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=szero + call x%set_host() + end subroutine s_gpu_zero + + subroutine s_gpu_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_s_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'s_gpu_sync') + end if + + end subroutine s_gpu_sync + + subroutine s_gpu_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine s_gpu_free + + subroutine s_gpu_set_scal(x,val,first,last) + class(psb_s_vect_gpu), intent(inout) :: x + real(psb_spk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + if (x%is_host()) call x%sync() + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine s_gpu_set_scal +!!$ +!!$ subroutine s_gpu_set_vect(x,val) +!!$ class(psb_s_vect_gpu), intent(inout) :: x +!!$ real(psb_spk_), intent(in) :: val(:) +!!$ integer(psb_ipk_) :: nr +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_s_base_vect_type%set_vect(val) +!!$ call x%set_host() +!!$ +!!$ end subroutine s_gpu_set_vect + + + + function s_gpu_dot_v(n,x,y) result(res) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + real(psb_spk_), external :: ddot + integer(psb_ipk_) :: info + + res = szero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_s_vect + ! + select type(yy => y) + type is (psb_s_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n,x%v,1,yy%v,1) + type is (psb_s_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'s_gpu_dot_v') + end if + + class default + ! y%sync is done in dot_a + call x%sync() + res = y%dot(n,x%v) + end select + + end function s_gpu_dot_v + + function s_gpu_dot_a(n,x,y) result(res) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + real(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + real(psb_spk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n,y,1,x%v,1) + + end function s_gpu_dot_a + + subroutine s_gpu_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_vect_gpu), intent(inout) :: y + real(psb_spk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_s_vect_gpu) + ! Do something different here + if ((beta /= szero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + type is (psb_s_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_s_vect_gpu) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,sone,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine s_gpu_mlt_v + + subroutine s_gpu_mlt_a(x, y, info) + use psi_serial_mod + implicit none + real(psb_spk_), intent(in) :: x(:) + class(psb_s_vect_gpu), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_s_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine s_gpu_mlt_a + + subroutine s_gpu_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + real(psb_spk_), intent(in) :: alpha,beta + real(psb_spk_), intent(in) :: x(:) + real(psb_spk_), intent(in) :: y(:) + class(psb_s_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_s_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine s_gpu_mlt_a_2 + + subroutine s_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + real(psb_spk_), intent(in) :: alpha,beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_s_vect_gpu) + select type (yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= szero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= szero).and.(z%is_dev())) call z%sync() + call z%psb_s_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= szero).and.(z%is_dev())) call z%sync() + call z%psb_s_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine s_gpu_mlt_v_2 + + subroutine s_gpu_scal(alpha, x) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + real(psb_spk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine s_gpu_scal + + + function s_gpu_nrm2(n,x) result(res) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDevice(res,n,x%deviceVect) + + end function s_gpu_nrm2 + + function s_gpu_amax(n,x) result(res) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDevice(res,n,x%deviceVect) + + end function s_gpu_amax + + function s_gpu_asum(n,x) result(res) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDevice(res,n,x%deviceVect) + + end function s_gpu_asum + + subroutine s_gpu_absval1(x) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,sone,x%deviceVect) + + end subroutine s_gpu_absval1 + + subroutine s_gpu_absval2(x,y) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_s_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,sone,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_s_base_vect_type%absval(y) + end select + end subroutine s_gpu_absval2 + + + subroutine s_gpu_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine s_gpu_vect_finalize + + subroutine s_gpu_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_s_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_gpu + + info = 0 + if (psb_errstatus_fatal()) return + + done_gpu = .false. + select type(virl => irl) + class is (psb_i_vect_gpu) + select type(vval => val) + class is (psb_s_vect_gpu) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceFloat(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_gpu=.true. + end select + end select + + if (.not.done_gpu) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'gpu_vect_ins') + return + end if + + end subroutine s_gpu_ins_v + + subroutine s_gpu_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_s_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine s_gpu_ins_a + +#endif + +end module psb_s_gpu_vect_mod + + +! +! Multivectors +! + + + +module psb_s_gpu_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_s_multivect_mod + use psb_s_base_multivect_mod + + use psb_i_multivect_mod +#ifdef HAVE_SPGPU + use psb_i_gpu_multivect_mod + use psb_s_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_base_multivect_type) :: psb_s_multivect_gpu +#ifdef HAVE_SPGPU + + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => s_gpu_multi_get_nrows + procedure, pass(x) :: get_ncols => s_gpu_multi_get_ncols + procedure, nopass :: get_fmt => s_gpu_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => s_gpu_multi_dot_v +!!$ procedure, pass(x) :: dot_a => s_gpu_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => s_gpu_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => s_gpu_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => s_gpu_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => s_gpu_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => s_gpu_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => s_gpu_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => s_gpu_multi_scal +!!$ procedure, pass(x) :: nrm2 => s_gpu_multi_nrm2 +!!$ procedure, pass(x) :: amax => s_gpu_multi_amax +!!$ procedure, pass(x) :: asum => s_gpu_multi_asum + procedure, pass(x) :: all => s_gpu_multi_all + procedure, pass(x) :: zero => s_gpu_multi_zero + procedure, pass(x) :: asb => s_gpu_multi_asb + procedure, pass(x) :: sync => s_gpu_multi_sync + procedure, pass(x) :: sync_space => s_gpu_multi_sync_space + procedure, pass(x) :: bld_x => s_gpu_multi_bld_x + procedure, pass(x) :: bld_n => s_gpu_multi_bld_n + procedure, pass(x) :: free => s_gpu_multi_free + procedure, pass(x) :: ins => s_gpu_multi_ins + procedure, pass(x) :: is_host => s_gpu_multi_is_host + procedure, pass(x) :: is_dev => s_gpu_multi_is_dev + procedure, pass(x) :: is_sync => s_gpu_multi_is_sync + procedure, pass(x) :: set_host => s_gpu_multi_set_host + procedure, pass(x) :: set_dev => s_gpu_multi_set_dev + procedure, pass(x) :: set_sync => s_gpu_multi_set_sync + procedure, pass(x) :: set_scal => s_gpu_multi_set_scal + procedure, pass(x) :: set_vect => s_gpu_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => s_gpu_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => s_gpu_multi_sctb +!!$ procedure, pass(y) :: sctb_x => s_gpu_multi_sctb_x + final :: s_gpu_multi_vect_finalize +#endif + end type psb_s_multivect_gpu + + public :: psb_s_multivect_gpu + private :: constructor + interface psb_s_multivect_gpu + module procedure constructor + end interface + +contains + + function constructor(x) result(this) + real(psb_spk_) :: x(:,:) + type(psb_s_multivect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function constructor + +#ifdef HAVE_SPGPU + +!!$ subroutine s_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_spk_) :: y(:) +!!$ class(psb_s_multivect_gpu) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine s_gpu_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine s_gpu_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ real(psb_spk_) :: beta, x(:) +!!$ class(psb_s_multivect_gpu) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_s_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine s_gpu_multi_sctb +!!$ +!!$ subroutine s_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_spk_) :: beta, x(:) +!!$ class(psb_s_multivect_gpu) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine s_gpu_multi_sctb_x + + + subroutine s_gpu_multi_bld_x(x,this) + use psb_base_mod + real(psb_spk_), intent(in) :: this(:,:) + class(psb_s_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_gpu_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine s_gpu_multi_bld_x + + subroutine s_gpu_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'s_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine s_gpu_multi_bld_n + + + subroutine s_gpu_multi_set_host(x) + implicit none + class(psb_s_multivect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine s_gpu_multi_set_host + + subroutine s_gpu_multi_set_dev(x) + implicit none + class(psb_s_multivect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine s_gpu_multi_set_dev + + subroutine s_gpu_multi_set_sync(x) + implicit none + class(psb_s_multivect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine s_gpu_multi_set_sync + + function s_gpu_multi_is_dev(x) result(res) + implicit none + class(psb_s_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function s_gpu_multi_is_dev + + function s_gpu_multi_is_host(x) result(res) + implicit none + class(psb_s_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function s_gpu_multi_is_host + + function s_gpu_multi_is_sync(x) result(res) + implicit none + class(psb_s_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function s_gpu_multi_is_sync + + + function s_gpu_multi_get_nrows(x) result(res) + implicit none + class(psb_s_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function s_gpu_multi_get_nrows + + function s_gpu_multi_get_ncols(x) result(res) + implicit none + class(psb_s_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function s_gpu_multi_get_ncols + + function s_gpu_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'sGPU' + end function s_gpu_multi_get_fmt + +!!$ function s_gpu_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ real(psb_spk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = dzero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_s_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_s_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_s_multivect_gpu) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'s_gpu_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function s_gpu_multi_dot_v +!!$ +!!$ function s_gpu_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ real(psb_spk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ real(psb_spk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function s_gpu_multi_dot_a +!!$ +!!$ subroutine s_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_s_base_multivect_type), intent(inout) :: x +!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ real(psb_spk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_s_base_multivect_type) +!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_s_multivect_gpu) +!!$ ! Do something different here +!!$ if ((beta /= dzero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_s_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_s_multivect_gpu) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine s_gpu_multi_mlt_v +!!$ +!!$ subroutine s_gpu_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_spk_), intent(in) :: x(:) +!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_s_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine s_gpu_multi_mlt_a +!!$ +!!$ subroutine s_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_spk_), intent(in) :: alpha,beta +!!$ real(psb_spk_), intent(in) :: x(:) +!!$ real(psb_spk_), intent(in) :: y(:) +!!$ class(psb_s_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_s_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine s_gpu_multi_mlt_a_2 +!!$ +!!$ subroutine s_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ real(psb_spk_), intent(in) :: alpha,beta +!!$ class(psb_s_base_multivect_type), intent(inout) :: x +!!$ class(psb_s_base_multivect_type), intent(inout) :: y +!!$ class(psb_s_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_s_multivect_gpu) +!!$ select type (yy => y) +!!$ type is (psb_s_multivect_gpu) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_s_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_s_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine s_gpu_multi_mlt_v_2 + + + subroutine s_gpu_multi_set_scal(x,val) + class(psb_s_multivect_gpu), intent(inout) :: x + real(psb_spk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_s_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine s_gpu_multi_set_scal + + subroutine s_gpu_multi_set_vect(x,val) + class(psb_s_multivect_gpu), intent(inout) :: x + real(psb_spk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_s_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine s_gpu_multi_set_vect + + + +!!$ subroutine s_gpu_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ real(psb_spk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_s_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine s_gpu_multi_scal +!!$ +!!$ +!!$ function s_gpu_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function s_gpu_multi_nrm2 +!!$ +!!$ function s_gpu_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function s_gpu_multi_amax +!!$ +!!$ function s_gpu_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function s_gpu_multi_asum + + subroutine s_gpu_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_multivect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=szero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_gpu_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine s_gpu_multi_all + + subroutine s_gpu_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_s_multivect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=dzero + call x%set_host() + end subroutine s_gpu_multi_zero + + subroutine s_gpu_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_multivect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_s_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc s_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => s_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_s_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_s_hdiag_csmm + procedure, pass(a) :: csmv => psb_s_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_s_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_s_hdiag_scals + ! procedure, pass(a) :: scalv => psb_s_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_s_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_s_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_s_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_s_mv_hdiag_from_fmt + procedure, pass(a) :: free => s_hdiag_free + procedure, pass(a) :: mold => psb_s_hdiag_mold + procedure, pass(a) :: to_gpu => psb_s_hdiag_to_gpu + final :: s_hdiag_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_hdiag_mold +#endif + end type psb_s_hdiag_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_hdiag_get_nzeros, s_hdiag_free, s_hdiag_get_fmt, & + & s_hdiag_get_size, s_hdiag_sizeof, s_hdiag_get_nz_row + + + interface + subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_s_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_s_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_s_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_s_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_s_hdiag_mold(a,b,info) + import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hdiag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hdiag_mold + end interface + + interface + subroutine psb_s_hdiag_to_gpu(a,info) + import :: psb_s_hdiag_sparse_mat, psb_ipk_ + class(psb_s_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hdiag_to_gpu + end interface + + interface + subroutine psb_s_cp_hdiag_from_coo(a,b,info) + import :: psb_s_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_s_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_s_mv_hdiag_from_coo(a,b,info) + import :: psb_s_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_s_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_s_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdiag_scal(d,a,info, side) +!!$ import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_s_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdiag_scals(d,a,info) +!!$ import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function s_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function s_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_hdia_sparse_mat%free() + + return + + end subroutine s_hdiag_free + + subroutine s_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_s_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_hdia_sparse_mat%free() + + return + end subroutine s_hdiag_finalize + +#else + + interface + subroutine psb_s_hdiag_mold(a,b,info) + import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hdiag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hdiag_mold + end interface + +#endif + +end module psb_s_hdiag_mat_mod diff --git a/gpu/psb_s_hlg_mat_mod.F90 b/gpu/psb_s_hlg_mat_mod.F90 new file mode 100644 index 00000000..8f896e4b --- /dev/null +++ b/gpu/psb_s_hlg_mat_mod.F90 @@ -0,0 +1,398 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_hlg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use psb_s_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_hll_sparse_mat) :: psb_s_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => s_hlg_get_fmt + procedure, pass(a) :: sizeof => s_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_s_hlg_vect_mv + procedure, pass(a) :: csmm => psb_s_hlg_csmm + procedure, pass(a) :: csmv => psb_s_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_s_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_s_hlg_scals + procedure, pass(a) :: scalv => psb_s_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_s_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_mv_hlg_from_fmt + procedure, pass(a) :: free => s_hlg_free + procedure, pass(a) :: mold => psb_s_hlg_mold + procedure, pass(a) :: is_host => s_hlg_is_host + procedure, pass(a) :: is_dev => s_hlg_is_dev + procedure, pass(a) :: is_sync => s_hlg_is_sync + procedure, pass(a) :: set_host => s_hlg_set_host + procedure, pass(a) :: set_dev => s_hlg_set_dev + procedure, pass(a) :: set_sync => s_hlg_set_sync + procedure, pass(a) :: sync => s_hlg_sync + procedure, pass(a) :: from_gpu => psb_s_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_s_hlg_to_gpu + final :: s_hlg_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_hlg_mold +#endif + end type psb_s_hlg_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_hlg_get_nzeros, s_hlg_free, s_hlg_get_fmt, & + & s_hlg_get_size, s_hlg_sizeof, s_hlg_get_nz_row + + + interface + subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hlg_vect_mv + end interface + + interface + subroutine psb_s_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hlg_inner_vect_sv + end interface + + interface + subroutine psb_s_hlg_reallocate_nz(nz,a) + import :: psb_s_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_s_hlg_reallocate_nz + end interface + + interface + subroutine psb_s_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_s_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_hlg_allocate_mnnz + end interface + + interface + subroutine psb_s_hlg_mold(a,b,info) + import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hlg_mold + end interface + + interface + subroutine psb_s_hlg_from_gpu(a,info) + import :: psb_s_hlg_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hlg_from_gpu + end interface + + interface + subroutine psb_s_hlg_to_gpu(a,info, nzrm) + import :: psb_s_hlg_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_hlg_to_gpu + end interface + + interface + subroutine psb_s_cp_hlg_from_coo(a,b,info) + import :: psb_s_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hlg_from_coo + end interface + + interface + subroutine psb_s_cp_hlg_from_fmt(a,b,info) + import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hlg_from_fmt + end interface + + interface + subroutine psb_s_mv_hlg_from_coo(a,b,info) + import :: psb_s_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hlg_from_coo + end interface + + + interface + subroutine psb_s_mv_hlg_from_fmt(a,b,info) + import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hlg_from_fmt + end interface + + interface + subroutine psb_s_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hlg_csmv + end interface + interface + subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hlg_csmm + end interface + + interface + subroutine psb_s_hlg_scal(d,a,info, side) + import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_hlg_scal + end interface + + interface + subroutine psb_s_hlg_scals(d,a,info) + import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_hlg_sizeof(a) result(res) + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_hlg_sizeof + + function s_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function s_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_hll_sparse_mat%free() + + return + + end subroutine s_hlg_free + + + subroutine s_hlg_sync(a) + implicit none + class(psb_s_hlg_sparse_mat), target, intent(in) :: a + class(psb_s_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine s_hlg_sync + + subroutine s_hlg_set_host(a) + implicit none + class(psb_s_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_hlg_set_host + + subroutine s_hlg_set_dev(a) + implicit none + class(psb_s_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_hlg_set_dev + + subroutine s_hlg_set_sync(a) + implicit none + class(psb_s_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_hlg_set_sync + + function s_hlg_is_dev(a) result(res) + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_hlg_is_dev + + function s_hlg_is_host(a) result(res) + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_hlg_is_host + + function s_hlg_is_sync(a) result(res) + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_hlg_is_sync + + + subroutine s_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_s_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine s_hlg_finalize + +#else + + interface + subroutine psb_s_hlg_mold(a,b,info) + import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hlg_mold + end interface + +#endif + +end module psb_s_hlg_mat_mod diff --git a/gpu/psb_s_hybg_mat_mod.F90 b/gpu/psb_s_hybg_mat_mod.F90 new file mode 100644 index 00000000..5a8e0e5d --- /dev/null +++ b/gpu/psb_s_hybg_mat_mod.F90 @@ -0,0 +1,306 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if CUDA_SHORT_VERSION <= 10 + +module psb_s_hybg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use cusparse_mod + + type, extends(psb_s_csr_sparse_mat) :: psb_s_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(s_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => s_hybg_get_fmt + procedure, pass(a) :: sizeof => s_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_s_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_hybg_csmm + procedure, pass(a) :: csmv => psb_s_hybg_csmv + procedure, pass(a) :: scals => psb_s_hybg_scals + procedure, pass(a) :: scalv => psb_s_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_s_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_mv_hybg_from_fmt + procedure, pass(a) :: free => s_hybg_free + procedure, pass(a) :: mold => psb_s_hybg_mold + procedure, pass(a) :: to_gpu => psb_s_hybg_to_gpu + final :: s_hybg_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_hybg_mold +#endif + end type psb_s_hybg_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_hybg_get_nzeros, s_hybg_free, s_hybg_get_fmt, & + & s_hybg_get_size, s_hybg_sizeof, s_hybg_get_nz_row + + + interface + subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hybg_inner_vect_sv + end interface + + interface + subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hybg_vect_mv + end interface + + interface + subroutine psb_s_hybg_reallocate_nz(nz,a) + import :: psb_s_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_s_hybg_reallocate_nz + end interface + + interface + subroutine psb_s_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_s_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_hybg_allocate_mnnz + end interface + + interface + subroutine psb_s_hybg_mold(a,b,info) + import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hybg_mold + end interface + + interface + subroutine psb_s_hybg_to_gpu(a,info, nzrm) + import :: psb_s_hybg_sparse_mat, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_hybg_to_gpu + end interface + + interface + subroutine psb_s_cp_hybg_from_coo(a,b,info) + import :: psb_s_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hybg_from_coo + end interface + + interface + subroutine psb_s_cp_hybg_from_fmt(a,b,info) + import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hybg_from_fmt + end interface + + interface + subroutine psb_s_mv_hybg_from_coo(a,b,info) + import :: psb_s_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hybg_from_coo + end interface + + interface + subroutine psb_s_mv_hybg_from_fmt(a,b,info) + import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hybg_from_fmt + end interface + + interface + subroutine psb_s_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hybg_csmv + end interface + interface + subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hybg_csmm + end interface + + interface + subroutine psb_s_hybg_scal(d,a,info,side) + import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_hybg_scal + end interface + + interface + subroutine psb_s_hybg_scals(d,a,info) + import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_hybg_sizeof(a) result(res) + implicit none + class(psb_s_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_hybg_sizeof + + function s_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function s_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_s_csr_sparse_mat%free() + + return + + end subroutine s_hybg_free + + subroutine s_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_s_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine s_hybg_finalize + +#else + + interface + subroutine psb_s_hybg_mold(a,b,info) + import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hybg_mold + end interface + +#endif + +end module psb_s_hybg_mat_mod +#endif diff --git a/gpu/psb_s_vectordev_mod.F90 b/gpu/psb_s_vectordev_mod.F90 new file mode 100644 index 00000000..a7319b95 --- /dev/null +++ b/gpu/psb_s_vectordev_mod.F90 @@ -0,0 +1,390 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_vectordev_mod + + use psb_base_vectordev_mod + +#ifdef HAVE_SPGPU + + interface registerMapped + function registerMappedFloat(buf,d_p,n,dummy) & + & result(res) bind(c,name='registerMappedFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: buf + type(c_ptr) :: d_p + integer(c_int),value :: n + real(c_float), value :: dummy + end function registerMappedFloat + end interface + + interface writeMultiVecDevice + function writeMultiVecDeviceFloat(deviceVec,hostVec) & + & result(res) bind(c,name='writeMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + real(c_float) :: hostVec(*) + end function writeMultiVecDeviceFloat + function writeMultiVecDeviceFloatR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='writeMultiVecDeviceFloatR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + real(c_float) :: hostVec(ld,*) + end function writeMultiVecDeviceFloatR2 + end interface + + interface readMultiVecDevice + function readMultiVecDeviceFloat(deviceVec,hostVec) & + & result(res) bind(c,name='readMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + real(c_float) :: hostVec(*) + end function readMultiVecDeviceFloat + function readMultiVecDeviceFloatR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='readMultiVecDeviceFloatR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + real(c_float) :: hostVec(ld,*) + end function readMultiVecDeviceFloatR2 + end interface + + interface allocateFloat + function allocateFloat(didx,n) & + & result(res) bind(c,name='allocateFloat') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: n + integer(c_int) :: res + end function allocateFloat + function allocateMultiFloat(didx,m,n) & + & result(res) bind(c,name='allocateMultiFloat') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: m,n + integer(c_int) :: res + end function allocateMultiFloat + end interface + + interface writeFloat + function writeFloat(didx,hidx,n) & + & result(res) bind(c,name='writeFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_float) :: hidx(*) + integer(c_int),value :: n + end function writeFloat + function writeFloatFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='writeFloatFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_float) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function writeFloatFirst + function writeMultiFloat(didx,hidx,m,n) & + & result(res) bind(c,name='writeMultiFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_float) :: hidx(m,*) + integer(c_int),value :: m,n + end function writeMultiFloat + end interface + + interface readFloat + function readFloat(didx,hidx,n) & + & result(res) bind(c,name='readFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_float) :: hidx(*) + integer(c_int),value :: n + end function readFloat + function readFloatFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='readFloatFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_float) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function readFloatFirst + function readMultiFloat(didx,hidx,m,n) & + & result(res) bind(c,name='readMultiFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_float) :: hidx(m,*) + integer(c_int),value :: m,n + end function readMultiFloat + end interface + + interface + subroutine freeFloat(didx) & + & bind(c,name='freeFloat') + use iso_c_binding + type(c_ptr), value :: didx + end subroutine freeFloat + end interface + + + interface setScalDevice + function setScalMultiVecDeviceFloat(val, first, last, & + & indexBase, deviceVecX) result(res) & + & bind(c,name='setscalMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: first,last,indexbase + real(c_float), value :: val + type(c_ptr), value :: deviceVecX + end function setScalMultiVecDeviceFloat + end interface + + interface + function geinsMultiVecDeviceFloat(n,deviceVecIrl,deviceVecVal,& + & dupl,indexbase,deviceVecX) & + & result(res) bind(c,name='geinsMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n, dupl,indexbase + type(c_ptr), value :: deviceVecIrl, deviceVecVal, deviceVecX + end function geinsMultiVecDeviceFloat + end interface + + ! New gather functions + + interface + function igathMultiVecDeviceFloat(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceFloat + end interface + + interface + function igathMultiVecDeviceFloatVecIdx(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceFloatVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceFloatVecIdx + end interface + + interface + function iscatMultiVecDeviceFloat(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + real(c_float),value :: beta + end function iscatMultiVecDeviceFloat + end interface + + interface + function iscatMultiVecDeviceFloatVecIdx(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceFloatVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + real(c_float),value :: beta + end function iscatMultiVecDeviceFloatVecIdx + end interface + + + interface scalMultiVecDevice + function scalMultiVecDeviceFloat(alpha,deviceVecA) & + & result(val) bind(c,name='scalMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + real(c_float), value :: alpha + type(c_ptr), value :: deviceVecA + end function scalMultiVecDeviceFloat + end interface + + interface dotMultiVecDevice + function dotMultiVecDeviceFloat(res, n,deviceVecA,deviceVecB) & + & result(val) bind(c,name='dotMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_float) :: res + type(c_ptr), value :: deviceVecA, deviceVecB + end function dotMultiVecDeviceFloat + end interface + + interface nrm2MultiVecDevice + function nrm2MultiVecDeviceFloat(res,n,deviceVecA) & + & result(val) bind(c,name='nrm2MultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_float) :: res + type(c_ptr), value :: deviceVecA + end function nrm2MultiVecDeviceFloat + end interface + + interface amaxMultiVecDevice + function amaxMultiVecDeviceFloat(res,n,deviceVecA) & + & result(val) bind(c,name='amaxMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_float) :: res + type(c_ptr), value :: deviceVecA + end function amaxMultiVecDeviceFloat + end interface + + interface asumMultiVecDevice + function asumMultiVecDeviceFloat(res,n,deviceVecA) & + & result(val) bind(c,name='asumMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_float) :: res + type(c_ptr), value :: deviceVecA + end function asumMultiVecDeviceFloat + end interface + + + interface axpbyMultiVecDevice + function axpbyMultiVecDeviceFloat(n,alpha,deviceVecA,beta,deviceVecB) & + & result(res) bind(c,name='axpbyMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_float), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB + end function axpbyMultiVecDeviceFloat + end interface + + interface axyMultiVecDevice + function axyMultiVecDeviceFloat(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='axyMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_float), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function axyMultiVecDeviceFloat + end interface + + interface axybzMultiVecDevice + function axybzMultiVecDeviceFloat(n,alpha,deviceVecA,deviceVecB,beta,deviceVecZ) & + & result(res) bind(c,name='axybzMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_float), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB,deviceVecZ + end function axybzMultiVecDeviceFloat + end interface + + + interface absMultiVecDevice + function absMultiVecDeviceFloat(n,alpha,deviceVecA) & + & result(res) bind(c,name='absMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_float), value :: alpha + type(c_ptr), value :: deviceVecA + end function absMultiVecDeviceFloat + function absMultiVecDeviceFloat2(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='absMultiVecDeviceFloat2') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_float), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function absMultiVecDeviceFloat2 + end interface + + interface inner_register + module procedure inner_registerFloat + end interface + + interface inner_unregister + module procedure inner_unregisterFloat + end interface + +contains + + + function inner_registerFloat(buffer,dval) result(res) + real(c_float), allocatable, target :: buffer(:) + type(c_ptr) :: dval + integer(c_int) :: res + real(c_float) :: dummy + res = registerMapped(c_loc(buffer),dval,size(buffer), dummy) + end function inner_registerFloat + + subroutine inner_unregisterFloat(buffer) + real(c_float), allocatable, target :: buffer(:) + + call unregisterMapped(c_loc(buffer)) + end subroutine inner_unregisterFloat + +#endif + +end module psb_s_vectordev_mod diff --git a/gpu/psb_vectordev_mod.f90 b/gpu/psb_vectordev_mod.f90 new file mode 100644 index 00000000..1316d458 --- /dev/null +++ b/gpu/psb_vectordev_mod.f90 @@ -0,0 +1,8 @@ +module psb_vectordev_mod + use psb_base_vectordev_mod + use psb_s_vectordev_mod + use psb_d_vectordev_mod + use psb_c_vectordev_mod + use psb_z_vectordev_mod + use psb_i_vectordev_mod +end module psb_vectordev_mod diff --git a/gpu/psb_z_csrg_mat_mod.F90 b/gpu/psb_z_csrg_mat_mod.F90 new file mode 100644 index 00000000..14df1124 --- /dev/null +++ b/gpu/psb_z_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_csrg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_csr_sparse_mat) :: psb_z_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(z_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => z_csrg_get_fmt + procedure, pass(a) :: sizeof => z_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_z_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_csrg_csmm + procedure, pass(a) :: csmv => psb_z_csrg_csmv + procedure, pass(a) :: scals => psb_z_csrg_scals + procedure, pass(a) :: scalv => psb_z_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_z_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_mv_csrg_from_fmt + procedure, pass(a) :: free => z_csrg_free + procedure, pass(a) :: mold => psb_z_csrg_mold + procedure, pass(a) :: is_host => z_csrg_is_host + procedure, pass(a) :: is_dev => z_csrg_is_dev + procedure, pass(a) :: is_sync => z_csrg_is_sync + procedure, pass(a) :: set_host => z_csrg_set_host + procedure, pass(a) :: set_dev => z_csrg_set_dev + procedure, pass(a) :: set_sync => z_csrg_set_sync + procedure, pass(a) :: sync => z_csrg_sync + procedure, pass(a) :: to_gpu => psb_z_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_z_csrg_from_gpu + final :: z_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_csrg_mold +#endif + end type psb_z_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_csrg_get_nzeros, z_csrg_free, z_csrg_get_fmt, & + & z_csrg_get_size, z_csrg_sizeof, z_csrg_get_nz_row + + + interface + subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_csrg_vect_mv + end interface + + interface + subroutine psb_z_csrg_reallocate_nz(nz,a) + import :: psb_z_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_z_csrg_reallocate_nz + end interface + + interface + subroutine psb_z_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_z_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_csrg_allocate_mnnz + end interface + + interface + subroutine psb_z_csrg_mold(a,b,info) + import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_csrg_mold + end interface + + interface + subroutine psb_z_csrg_to_gpu(a,info, nzrm) + import :: psb_z_csrg_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_csrg_to_gpu + end interface + + interface + subroutine psb_z_csrg_from_gpu(a,info) + import :: psb_z_csrg_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_csrg_from_gpu + end interface + + interface + subroutine psb_z_cp_csrg_from_coo(a,b,info) + import :: psb_z_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_csrg_from_coo + end interface + + interface + subroutine psb_z_cp_csrg_from_fmt(a,b,info) + import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_csrg_from_fmt + end interface + + interface + subroutine psb_z_mv_csrg_from_coo(a,b,info) + import :: psb_z_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_csrg_from_coo + end interface + + interface + subroutine psb_z_mv_csrg_from_fmt(a,b,info) + import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_csrg_from_fmt + end interface + + interface + subroutine psb_z_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_csrg_csmv + end interface + interface + subroutine psb_z_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_csrg_csmm + end interface + + interface + subroutine psb_z_csrg_scal(d,a,info,side) + import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_csrg_scal + end interface + + interface + subroutine psb_z_csrg_scals(d,a,info) + import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_csrg_sizeof(a) result(res) + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_csrg_sizeof + + function z_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function z_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine z_csrg_set_host(a) + implicit none + class(psb_z_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_csrg_set_host + + subroutine z_csrg_set_dev(a) + implicit none + class(psb_z_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_csrg_set_dev + + subroutine z_csrg_set_sync(a) + implicit none + class(psb_z_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_csrg_set_sync + + function z_csrg_is_dev(a) result(res) + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_csrg_is_dev + + function z_csrg_is_host(a) result(res) + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_csrg_is_host + + function z_csrg_is_sync(a) result(res) + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_csrg_is_sync + + + subroutine z_csrg_sync(a) + implicit none + class(psb_z_csrg_sparse_mat), target, intent(in) :: a + class(psb_z_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine z_csrg_sync + + subroutine z_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_z_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_z_csr_sparse_mat%free() + + return + + end subroutine z_csrg_free + + subroutine z_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_z_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine z_csrg_finalize + +#else + interface + subroutine psb_z_csrg_mold(a,b,info) + import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_csrg_mold + end interface + +#endif + +end module psb_z_csrg_mat_mod diff --git a/gpu/psb_z_diag_mat_mod.F90 b/gpu/psb_z_diag_mat_mod.F90 new file mode 100644 index 00000000..986d75d9 --- /dev/null +++ b/gpu/psb_z_diag_mat_mod.F90 @@ -0,0 +1,308 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_z_dia_mat_mod + + type, extends(psb_z_dia_sparse_mat) :: psb_z_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => z_diag_get_fmt + procedure, pass(a) :: sizeof => z_diag_sizeof + procedure, pass(a) :: vect_mv => psb_z_diag_vect_mv +! procedure, pass(a) :: csmm => psb_z_diag_csmm + procedure, pass(a) :: csmv => psb_z_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_z_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_z_diag_scals +! procedure, pass(a) :: scalv => psb_z_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_z_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_z_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_z_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_z_mv_diag_from_fmt + procedure, pass(a) :: free => z_diag_free + procedure, pass(a) :: mold => psb_z_diag_mold + procedure, pass(a) :: to_gpu => psb_z_diag_to_gpu + final :: z_diag_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_diag_mold +#endif + end type psb_z_diag_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_diag_get_nzeros, z_diag_free, z_diag_get_fmt, & + & z_diag_get_size, z_diag_sizeof, z_diag_get_nz_row + + + interface + subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_diag_vect_mv + end interface + + interface + subroutine psb_z_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_diag_inner_vect_sv + end interface + + interface + subroutine psb_z_diag_reallocate_nz(nz,a) + import :: psb_z_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_diag_sparse_mat), intent(inout) :: a + end subroutine psb_z_diag_reallocate_nz + end interface + + interface + subroutine psb_z_diag_allocate_mnnz(m,n,a,nz) + import :: psb_z_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_diag_allocate_mnnz + end interface + + interface + subroutine psb_z_diag_mold(a,b,info) + import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_diag_mold + end interface + + interface + subroutine psb_z_diag_to_gpu(a,info, nzrm) + import :: psb_z_diag_sparse_mat, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_diag_to_gpu + end interface + + interface + subroutine psb_z_cp_diag_from_coo(a,b,info) + import :: psb_z_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_diag_from_coo + end interface + + interface + subroutine psb_z_cp_diag_from_fmt(a,b,info) + import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_diag_from_fmt + end interface + + interface + subroutine psb_z_mv_diag_from_coo(a,b,info) + import :: psb_z_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_diag_from_coo + end interface + + + interface + subroutine psb_z_mv_diag_from_fmt(a,b,info) + import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_diag_from_fmt + end interface + + interface + subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_diag_csmv + end interface + interface + subroutine psb_z_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_diag_csmm + end interface + + interface + subroutine psb_z_diag_scal(d,a,info, side) + import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_diag_scal + end interface + + interface + subroutine psb_z_diag_scals(d,a,info) + import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_diag_sizeof(a) result(res) + implicit none + class(psb_z_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_diag_sizeof + + function z_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function z_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_dia_sparse_mat%free() + + return + + end subroutine z_diag_free + + subroutine z_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_z_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine z_diag_finalize + +#else + + interface + subroutine psb_z_diag_mold(a,b,info) + import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_diag_mold + end interface + +#endif + +end module psb_z_diag_mat_mod diff --git a/gpu/psb_z_dnsg_mat_mod.F90 b/gpu/psb_z_dnsg_mat_mod.F90 new file mode 100644 index 00000000..6a3d4369 --- /dev/null +++ b/gpu/psb_z_dnsg_mat_mod.F90 @@ -0,0 +1,294 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_dnsg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use psb_z_dns_mat_mod + use dnsdev_mod + + type, extends(psb_z_dns_sparse_mat) :: psb_z_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of DNS, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => z_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => z_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_z_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_z_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_z_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_z_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_z_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_z_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_z_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_z_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_z_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_mv_dnsg_from_fmt + procedure, pass(a) :: free => z_dnsg_free + procedure, pass(a) :: mold => psb_z_dnsg_mold + procedure, pass(a) :: to_gpu => psb_z_dnsg_to_gpu + final :: z_dnsg_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_dnsg_mold +#endif + end type psb_z_dnsg_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_dnsg_get_nzeros, z_dnsg_free, z_dnsg_get_fmt, & + & z_dnsg_get_size, z_dnsg_get_nz_row + + + interface + subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_z_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_z_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_z_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_z_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_z_dnsg_mold(a,b,info) + import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dnsg_mold + end interface + + interface + subroutine psb_z_dnsg_to_gpu(a,info) + import :: psb_z_dnsg_sparse_mat, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dnsg_to_gpu + end interface + + interface + subroutine psb_z_cp_dnsg_from_coo(a,b,info) + import :: psb_z_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dnsg_from_coo + end interface + + interface + subroutine psb_z_cp_dnsg_from_fmt(a,b,info) + import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_z_mv_dnsg_from_coo(a,b,info) + import :: psb_z_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_z_mv_dnsg_from_fmt(a,b,info) + import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_z_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_z_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_dnsg_scal(d,a,info, side) +!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_z_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_dnsg_scals(d,a,info) +!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function z_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function z_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_dns_sparse_mat%free() + + return + + end subroutine z_dnsg_free + + subroutine z_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_z_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine z_dnsg_finalize + +#else + + interface + subroutine psb_z_dnsg_mold(a,b,info) + import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dnsg_mold + end interface + +#endif + +end module psb_z_dnsg_mat_mod diff --git a/gpu/psb_z_elg_mat_mod.F90 b/gpu/psb_z_elg_mat_mod.F90 new file mode 100644 index 00000000..cf9e479c --- /dev/null +++ b/gpu/psb_z_elg_mat_mod.F90 @@ -0,0 +1,483 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_elg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use psb_z_ell_mat_mod + use psb_i_gpu_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_ell_sparse_mat) :: psb_z_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => z_elg_get_fmt + procedure, pass(a) :: sizeof => z_elg_sizeof + procedure, pass(a) :: vect_mv => psb_z_elg_vect_mv + procedure, pass(a) :: csmm => psb_z_elg_csmm + procedure, pass(a) :: csmv => psb_z_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_z_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_z_elg_scals + procedure, pass(a) :: scalv => psb_z_elg_scal + procedure, pass(a) :: reallocate_nz => psb_z_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_elg_allocate_mnnz + procedure, pass(a) :: reinit => z_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_mv_elg_from_fmt + procedure, pass(a) :: free => z_elg_free + procedure, pass(a) :: mold => psb_z_elg_mold + procedure, pass(a) :: csput_a => psb_z_elg_csput_a + procedure, pass(a) :: csput_v => psb_z_elg_csput_v + procedure, pass(a) :: is_host => z_elg_is_host + procedure, pass(a) :: is_dev => z_elg_is_dev + procedure, pass(a) :: is_sync => z_elg_is_sync + procedure, pass(a) :: set_host => z_elg_set_host + procedure, pass(a) :: set_dev => z_elg_set_dev + procedure, pass(a) :: set_sync => z_elg_set_sync + procedure, pass(a) :: sync => z_elg_sync + procedure, pass(a) :: from_gpu => psb_z_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_z_elg_to_gpu + procedure, pass(a) :: asb => psb_z_elg_asb + final :: z_elg_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_elg_mold + procedure, pass(a) :: asb => psb_z_elg_asb +#endif + end type psb_z_elg_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_elg_get_nzeros, z_elg_free, z_elg_get_fmt, & + & z_elg_get_size, z_elg_sizeof, z_elg_get_nz_row, z_elg_sync + + + interface + subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_elg_vect_mv + end interface + + interface + subroutine psb_z_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_elg_inner_vect_sv + end interface + + interface + subroutine psb_z_elg_reallocate_nz(nz,a) + import :: psb_z_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_elg_reallocate_nz + end interface + + interface + subroutine psb_z_elg_allocate_mnnz(m,n,a,nz) + import :: psb_z_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_elg_allocate_mnnz + end interface + + interface + subroutine psb_z_elg_mold(a,b,info) + import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_elg_mold + end interface + + interface + subroutine psb_z_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_elg_csput_a + end interface + + interface + subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_z_base_vect_type,& + & psb_i_base_vect_type + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_elg_csput_v + end interface + + interface + subroutine psb_z_elg_from_gpu(a,info) + import :: psb_z_elg_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_elg_from_gpu + end interface + + interface + subroutine psb_z_elg_to_gpu(a,info, nzrm) + import :: psb_z_elg_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_elg_to_gpu + end interface + + interface + subroutine psb_z_cp_elg_from_coo(a,b,info) + import :: psb_z_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_elg_from_coo + end interface + + interface + subroutine psb_z_cp_elg_from_fmt(a,b,info) + import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_elg_from_fmt + end interface + + interface + subroutine psb_z_mv_elg_from_coo(a,b,info) + import :: psb_z_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_elg_from_coo + end interface + + + interface + subroutine psb_z_mv_elg_from_fmt(a,b,info) + import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_elg_from_fmt + end interface + + interface + subroutine psb_z_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_elg_csmv + end interface + interface + subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_elg_csmm + end interface + + interface + subroutine psb_z_elg_scal(d,a,info, side) + import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_elg_scal + end interface + + interface + subroutine psb_z_elg_scals(d,a,info) + import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_elg_scals + end interface + + interface + subroutine psb_z_elg_asb(a) + import :: psb_z_elg_sparse_mat + class(psb_z_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_elg_sizeof(a) result(res) + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_elg_sizeof + + function z_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function z_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine z_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_z_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = zzero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine z_elg_reinit + + subroutine z_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_z_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine z_elg_free + + subroutine z_elg_sync(a) + implicit none + class(psb_z_elg_sparse_mat), target, intent(in) :: a + class(psb_z_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine z_elg_sync + + subroutine z_elg_set_host(a) + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_elg_set_host + + subroutine z_elg_set_dev(a) + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_elg_set_dev + + subroutine z_elg_set_sync(a) + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_elg_set_sync + + function z_elg_is_dev(a) result(res) + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_elg_is_dev + + function z_elg_is_host(a) result(res) + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_elg_is_host + + function z_elg_is_sync(a) result(res) + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_elg_is_sync + + subroutine z_elg_finalize(a) + use elldev_mod + implicit none + type(psb_z_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine z_elg_finalize + +#else + + interface + subroutine psb_z_elg_asb(a) + import :: psb_z_elg_sparse_mat + class(psb_z_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_elg_asb + end interface + + interface + subroutine psb_z_elg_mold(a,b,info) + import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_elg_mold + end interface + +#endif + +end module psb_z_elg_mat_mod diff --git a/gpu/psb_z_gpu_vect_mod.F90 b/gpu/psb_z_gpu_vect_mod.F90 new file mode 100644 index 00000000..ca5ac922 --- /dev/null +++ b/gpu/psb_z_gpu_vect_mod.F90 @@ -0,0 +1,1989 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_gpu_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_z_vect_mod + use psb_i_vect_mod +#ifdef HAVE_SPGPU + use psb_gpu_env_mod + use psb_i_gpu_vect_mod + use psb_i_vectordev_mod + use psb_z_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_base_vect_type) :: psb_z_vect_gpu +#ifdef HAVE_SPGPU + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + complex(c_double_complex), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + complex(c_double_complex), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => z_gpu_get_nrows + procedure, nopass :: get_fmt => z_gpu_get_fmt + + procedure, pass(x) :: all => z_gpu_all + procedure, pass(x) :: zero => z_gpu_zero + procedure, pass(x) :: asb_m => z_gpu_asb_m + procedure, pass(x) :: sync => z_gpu_sync + procedure, pass(x) :: sync_space => z_gpu_sync_space + procedure, pass(x) :: bld_x => z_gpu_bld_x + procedure, pass(x) :: bld_mn => z_gpu_bld_mn + procedure, pass(x) :: free => z_gpu_free + procedure, pass(x) :: ins_a => z_gpu_ins_a + procedure, pass(x) :: ins_v => z_gpu_ins_v + procedure, pass(x) :: is_host => z_gpu_is_host + procedure, pass(x) :: is_dev => z_gpu_is_dev + procedure, pass(x) :: is_sync => z_gpu_is_sync + procedure, pass(x) :: set_host => z_gpu_set_host + procedure, pass(x) :: set_dev => z_gpu_set_dev + procedure, pass(x) :: set_sync => z_gpu_set_sync + procedure, pass(x) :: set_scal => z_gpu_set_scal +!!$ procedure, pass(x) :: set_vect => z_gpu_set_vect + procedure, pass(x) :: gthzv_x => z_gpu_gthzv_x + procedure, pass(y) :: sctb => z_gpu_sctb + procedure, pass(y) :: sctb_x => z_gpu_sctb_x + procedure, pass(x) :: gthzbuf => z_gpu_gthzbuf + procedure, pass(y) :: sctb_buf => z_gpu_sctb_buf + procedure, pass(x) :: new_buffer => z_gpu_new_buffer + procedure, nopass :: device_wait => z_gpu_device_wait + procedure, pass(x) :: free_buffer => z_gpu_free_buffer + procedure, pass(x) :: maybe_free_buffer => z_gpu_maybe_free_buffer + procedure, pass(x) :: dot_v => z_gpu_dot_v + procedure, pass(x) :: dot_a => z_gpu_dot_a + procedure, pass(y) :: axpby_v => z_gpu_axpby_v + procedure, pass(y) :: axpby_a => z_gpu_axpby_a + procedure, pass(y) :: mlt_v => z_gpu_mlt_v + procedure, pass(y) :: mlt_a => z_gpu_mlt_a + procedure, pass(z) :: mlt_a_2 => z_gpu_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => z_gpu_mlt_v_2 + procedure, pass(x) :: scal => z_gpu_scal + procedure, pass(x) :: nrm2 => z_gpu_nrm2 + procedure, pass(x) :: amax => z_gpu_amax + procedure, pass(x) :: asum => z_gpu_asum + procedure, pass(x) :: absval1 => z_gpu_absval1 + procedure, pass(x) :: absval2 => z_gpu_absval2 + + final :: z_gpu_vect_finalize +#endif + end type psb_z_vect_gpu + + public :: psb_z_vect_gpu_ + private :: constructor + interface psb_z_vect_gpu_ + module procedure constructor + end interface psb_z_vect_gpu_ + +contains + + function constructor(x) result(this) + complex(psb_dpk_) :: x(:) + type(psb_z_vect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + +#ifdef HAVE_SPGPU + + subroutine z_gpu_device_wait() + call psb_cudaSync() + end subroutine z_gpu_device_wait + + subroutine z_gpu_new_buffer(n,x,info) + use psb_realloc_mod + use psb_gpu_env_mod + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_gpu_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDoubleComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDoubleComplex(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDoubleComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceDoubleComplex(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDoubleComplex(x%dt_buf,y,n) + + end select + + end subroutine z_gpu_gthzv_x + + subroutine z_gpu_gthzbuf(i,n,idx,x) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i,n + class(psb_i_base_vect_type) :: idx + class(psb_z_vect_gpu) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readDoubleComplex(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceDoubleComplex(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readDoubleComplex(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine z_gpu_gthzbuf + + subroutine z_gpu_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_ipk_) :: n, idx(:) + complex(psb_dpk_) :: beta, x(:) + class(psb_z_vect_gpu) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_z_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine z_gpu_sctb + + subroutine z_gpu_sctb_x(i,n,idx,x,beta,y) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_dpk_) :: beta, x(:) + class(psb_z_vect_gpu) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_gpu_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDoubleComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeDoubleComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDoubleComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeDoubleComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceDoubleComplex(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine z_gpu_sctb_x + + subroutine z_gpu_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_gpu_env_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_dpk_) :: beta + class(psb_z_vect_gpu) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_gpu) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_gpu_DeviceHasUVA()) then + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeDoubleComplex(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeDoubleComplex(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceDoubleComplex(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine z_gpu_sctb_buf + + + subroutine z_gpu_bld_x(x,this) + use psb_base_mod + complex(psb_dpk_), intent(in) :: this(:) + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_gpu_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine z_gpu_bld_x + + subroutine z_gpu_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'z_gpu_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine z_gpu_bld_mn + + subroutine z_gpu_set_host(x) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine z_gpu_set_host + + subroutine z_gpu_set_dev(x) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine z_gpu_set_dev + + subroutine z_gpu_set_sync(x) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine z_gpu_set_sync + + function z_gpu_is_dev(x) result(res) + implicit none + class(psb_z_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function z_gpu_is_dev + + function z_gpu_is_host(x) result(res) + implicit none + class(psb_z_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function z_gpu_is_host + + function z_gpu_is_sync(x) result(res) + implicit none + class(psb_z_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function z_gpu_is_sync + + + function z_gpu_get_nrows(x) result(res) + implicit none + class(psb_z_vect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function z_gpu_get_nrows + + function z_gpu_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'zGPU' + end function z_gpu_get_fmt + + subroutine z_gpu_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_z_vect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_gpu_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine z_gpu_all + + subroutine z_gpu_zero(x) + use psi_serial_mod + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=zzero + call x%set_host() + end subroutine z_gpu_zero + + subroutine z_gpu_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_z_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'z_gpu_sync') + end if + + end subroutine z_gpu_sync + + subroutine z_gpu_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine z_gpu_free + + subroutine z_gpu_set_scal(x,val,first,last) + class(psb_z_vect_gpu), intent(inout) :: x + complex(psb_dpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + if (x%is_host()) call x%sync() + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine z_gpu_set_scal +!!$ +!!$ subroutine z_gpu_set_vect(x,val) +!!$ class(psb_z_vect_gpu), intent(inout) :: x +!!$ complex(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_) :: nr +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_z_base_vect_type%set_vect(val) +!!$ call x%set_host() +!!$ +!!$ end subroutine z_gpu_set_vect + + + + function z_gpu_dot_v(n,x,y) result(res) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_) :: res + complex(psb_dpk_), external :: ddot + integer(psb_ipk_) :: info + + res = zzero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_z_vect + ! + select type(yy => y) + type is (psb_z_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n,x%v,1,yy%v,1) + type is (psb_z_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'z_gpu_dot_v') + end if + + class default + ! y%sync is done in dot_a + call x%sync() + res = y%dot(n,x%v) + end select + + end function z_gpu_dot_v + + function z_gpu_dot_a(n,x,y) result(res) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + complex(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_) :: res + complex(psb_dpk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n,y,1,x%v,1) + + end function z_gpu_dot_a + + subroutine z_gpu_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_vect_gpu), intent(inout) :: y + complex(psb_dpk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_z_vect_gpu) + ! Do something different here + if ((beta /= zzero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + type is (psb_z_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_z_vect_gpu) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,zone,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine z_gpu_mlt_v + + subroutine z_gpu_mlt_a(x, y, info) + use psi_serial_mod + implicit none + complex(psb_dpk_), intent(in) :: x(:) + class(psb_z_vect_gpu), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_z_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine z_gpu_mlt_a + + subroutine z_gpu_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + complex(psb_dpk_), intent(in) :: alpha,beta + complex(psb_dpk_), intent(in) :: x(:) + complex(psb_dpk_), intent(in) :: y(:) + class(psb_z_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_z_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine z_gpu_mlt_a_2 + + subroutine z_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + complex(psb_dpk_), intent(in) :: alpha,beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_z_vect_gpu) + select type (yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= zzero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= zzero).and.(z%is_dev())) call z%sync() + call z%psb_z_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= zzero).and.(z%is_dev())) call z%sync() + call z%psb_z_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine z_gpu_mlt_v_2 + + subroutine z_gpu_scal(alpha, x) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + complex(psb_dpk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine z_gpu_scal + + + function z_gpu_nrm2(n,x) result(res) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDeviceComplex(res,n,x%deviceVect) + + end function z_gpu_nrm2 + + function z_gpu_amax(n,x) result(res) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDeviceComplex(res,n,x%deviceVect) + + end function z_gpu_amax + + function z_gpu_asum(n,x) result(res) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDeviceComplex(res,n,x%deviceVect) + + end function z_gpu_asum + + subroutine z_gpu_absval1(x) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,zone,x%deviceVect) + + end subroutine z_gpu_absval1 + + subroutine z_gpu_absval2(x,y) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_z_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,zone,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_z_base_vect_type%absval(y) + end select + end subroutine z_gpu_absval2 + + + subroutine z_gpu_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine z_gpu_vect_finalize + + subroutine z_gpu_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_z_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_gpu + + info = 0 + if (psb_errstatus_fatal()) return + + done_gpu = .false. + select type(virl => irl) + class is (psb_i_vect_gpu) + select type(vval => val) + class is (psb_z_vect_gpu) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceDoubleComplex(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_gpu=.true. + end select + end select + + if (.not.done_gpu) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'gpu_vect_ins') + return + end if + + end subroutine z_gpu_ins_v + + subroutine z_gpu_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_z_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine z_gpu_ins_a + +#endif + +end module psb_z_gpu_vect_mod + + +! +! Multivectors +! + + + +module psb_z_gpu_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_z_multivect_mod + use psb_z_base_multivect_mod + + use psb_i_multivect_mod +#ifdef HAVE_SPGPU + use psb_i_gpu_multivect_mod + use psb_z_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_base_multivect_type) :: psb_z_multivect_gpu +#ifdef HAVE_SPGPU + + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => z_gpu_multi_get_nrows + procedure, pass(x) :: get_ncols => z_gpu_multi_get_ncols + procedure, nopass :: get_fmt => z_gpu_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => z_gpu_multi_dot_v +!!$ procedure, pass(x) :: dot_a => z_gpu_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => z_gpu_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => z_gpu_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => z_gpu_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => z_gpu_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => z_gpu_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => z_gpu_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => z_gpu_multi_scal +!!$ procedure, pass(x) :: nrm2 => z_gpu_multi_nrm2 +!!$ procedure, pass(x) :: amax => z_gpu_multi_amax +!!$ procedure, pass(x) :: asum => z_gpu_multi_asum + procedure, pass(x) :: all => z_gpu_multi_all + procedure, pass(x) :: zero => z_gpu_multi_zero + procedure, pass(x) :: asb => z_gpu_multi_asb + procedure, pass(x) :: sync => z_gpu_multi_sync + procedure, pass(x) :: sync_space => z_gpu_multi_sync_space + procedure, pass(x) :: bld_x => z_gpu_multi_bld_x + procedure, pass(x) :: bld_n => z_gpu_multi_bld_n + procedure, pass(x) :: free => z_gpu_multi_free + procedure, pass(x) :: ins => z_gpu_multi_ins + procedure, pass(x) :: is_host => z_gpu_multi_is_host + procedure, pass(x) :: is_dev => z_gpu_multi_is_dev + procedure, pass(x) :: is_sync => z_gpu_multi_is_sync + procedure, pass(x) :: set_host => z_gpu_multi_set_host + procedure, pass(x) :: set_dev => z_gpu_multi_set_dev + procedure, pass(x) :: set_sync => z_gpu_multi_set_sync + procedure, pass(x) :: set_scal => z_gpu_multi_set_scal + procedure, pass(x) :: set_vect => z_gpu_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => z_gpu_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => z_gpu_multi_sctb +!!$ procedure, pass(y) :: sctb_x => z_gpu_multi_sctb_x + final :: z_gpu_multi_vect_finalize +#endif + end type psb_z_multivect_gpu + + public :: psb_z_multivect_gpu + private :: constructor + interface psb_z_multivect_gpu + module procedure constructor + end interface + +contains + + function constructor(x) result(this) + complex(psb_dpk_) :: x(:,:) + type(psb_z_multivect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function constructor + +#ifdef HAVE_SPGPU + +!!$ subroutine z_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_dpk_) :: y(:) +!!$ class(psb_z_multivect_gpu) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine z_gpu_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine z_gpu_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ complex(psb_dpk_) :: beta, x(:) +!!$ class(psb_z_multivect_gpu) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_z_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine z_gpu_multi_sctb +!!$ +!!$ subroutine z_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_dpk_) :: beta, x(:) +!!$ class(psb_z_multivect_gpu) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine z_gpu_multi_sctb_x + + + subroutine z_gpu_multi_bld_x(x,this) + use psb_base_mod + complex(psb_dpk_), intent(in) :: this(:,:) + class(psb_z_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_gpu_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine z_gpu_multi_bld_x + + subroutine z_gpu_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'z_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine z_gpu_multi_bld_n + + + subroutine z_gpu_multi_set_host(x) + implicit none + class(psb_z_multivect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine z_gpu_multi_set_host + + subroutine z_gpu_multi_set_dev(x) + implicit none + class(psb_z_multivect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine z_gpu_multi_set_dev + + subroutine z_gpu_multi_set_sync(x) + implicit none + class(psb_z_multivect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine z_gpu_multi_set_sync + + function z_gpu_multi_is_dev(x) result(res) + implicit none + class(psb_z_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function z_gpu_multi_is_dev + + function z_gpu_multi_is_host(x) result(res) + implicit none + class(psb_z_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function z_gpu_multi_is_host + + function z_gpu_multi_is_sync(x) result(res) + implicit none + class(psb_z_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function z_gpu_multi_is_sync + + + function z_gpu_multi_get_nrows(x) result(res) + implicit none + class(psb_z_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function z_gpu_multi_get_nrows + + function z_gpu_multi_get_ncols(x) result(res) + implicit none + class(psb_z_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function z_gpu_multi_get_ncols + + function z_gpu_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'zGPU' + end function z_gpu_multi_get_fmt + +!!$ function z_gpu_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_dpk_) :: res +!!$ complex(psb_dpk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = dzero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_z_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_z_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_z_multivect_gpu) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'z_gpu_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function z_gpu_multi_dot_v +!!$ +!!$ function z_gpu_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ complex(psb_dpk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_dpk_) :: res +!!$ complex(psb_dpk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function z_gpu_multi_dot_a +!!$ +!!$ subroutine z_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_z_base_multivect_type), intent(inout) :: x +!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ complex(psb_dpk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_z_base_multivect_type) +!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_z_multivect_gpu) +!!$ ! Do something different here +!!$ if ((beta /= dzero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_z_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_z_multivect_gpu) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine z_gpu_multi_mlt_v +!!$ +!!$ subroutine z_gpu_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_dpk_), intent(in) :: x(:) +!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_z_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine z_gpu_multi_mlt_a +!!$ +!!$ subroutine z_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_dpk_), intent(in) :: alpha,beta +!!$ complex(psb_dpk_), intent(in) :: x(:) +!!$ complex(psb_dpk_), intent(in) :: y(:) +!!$ class(psb_z_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_z_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine z_gpu_multi_mlt_a_2 +!!$ +!!$ subroutine z_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ complex(psb_dpk_), intent(in) :: alpha,beta +!!$ class(psb_z_base_multivect_type), intent(inout) :: x +!!$ class(psb_z_base_multivect_type), intent(inout) :: y +!!$ class(psb_z_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_z_multivect_gpu) +!!$ select type (yy => y) +!!$ type is (psb_z_multivect_gpu) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_z_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_z_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine z_gpu_multi_mlt_v_2 + + + subroutine z_gpu_multi_set_scal(x,val) + class(psb_z_multivect_gpu), intent(inout) :: x + complex(psb_dpk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_z_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine z_gpu_multi_set_scal + + subroutine z_gpu_multi_set_vect(x,val) + class(psb_z_multivect_gpu), intent(inout) :: x + complex(psb_dpk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_z_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine z_gpu_multi_set_vect + + + +!!$ subroutine z_gpu_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ complex(psb_dpk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_z_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine z_gpu_multi_scal +!!$ +!!$ +!!$ function z_gpu_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function z_gpu_multi_nrm2 +!!$ +!!$ function z_gpu_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function z_gpu_multi_amax +!!$ +!!$ function z_gpu_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function z_gpu_multi_asum + + subroutine z_gpu_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_multivect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=zzero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_gpu_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine z_gpu_multi_all + + subroutine z_gpu_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_z_multivect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=dzero + call x%set_host() + end subroutine z_gpu_multi_zero + + subroutine z_gpu_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_multivect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_z_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc z_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => z_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_z_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_z_hdiag_csmm + procedure, pass(a) :: csmv => psb_z_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_z_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_z_hdiag_scals + ! procedure, pass(a) :: scalv => psb_z_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_z_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_z_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_z_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_z_mv_hdiag_from_fmt + procedure, pass(a) :: free => z_hdiag_free + procedure, pass(a) :: mold => psb_z_hdiag_mold + procedure, pass(a) :: to_gpu => psb_z_hdiag_to_gpu + final :: z_hdiag_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_hdiag_mold +#endif + end type psb_z_hdiag_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_hdiag_get_nzeros, z_hdiag_free, z_hdiag_get_fmt, & + & z_hdiag_get_size, z_hdiag_sizeof, z_hdiag_get_nz_row + + + interface + subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_z_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_z_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_z_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_z_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_z_hdiag_mold(a,b,info) + import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hdiag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hdiag_mold + end interface + + interface + subroutine psb_z_hdiag_to_gpu(a,info) + import :: psb_z_hdiag_sparse_mat, psb_ipk_ + class(psb_z_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hdiag_to_gpu + end interface + + interface + subroutine psb_z_cp_hdiag_from_coo(a,b,info) + import :: psb_z_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_z_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_z_mv_hdiag_from_coo(a,b,info) + import :: psb_z_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_z_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_z_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdiag_scal(d,a,info, side) +!!$ import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_z_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdiag_scals(d,a,info) +!!$ import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function z_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function z_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_hdia_sparse_mat%free() + + return + + end subroutine z_hdiag_free + + subroutine z_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_z_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_hdia_sparse_mat%free() + + return + end subroutine z_hdiag_finalize + +#else + + interface + subroutine psb_z_hdiag_mold(a,b,info) + import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hdiag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hdiag_mold + end interface + +#endif + +end module psb_z_hdiag_mat_mod diff --git a/gpu/psb_z_hlg_mat_mod.F90 b/gpu/psb_z_hlg_mat_mod.F90 new file mode 100644 index 00000000..09d490b3 --- /dev/null +++ b/gpu/psb_z_hlg_mat_mod.F90 @@ -0,0 +1,398 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_hlg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use psb_z_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_hll_sparse_mat) :: psb_z_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => z_hlg_get_fmt + procedure, pass(a) :: sizeof => z_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_z_hlg_vect_mv + procedure, pass(a) :: csmm => psb_z_hlg_csmm + procedure, pass(a) :: csmv => psb_z_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_z_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_z_hlg_scals + procedure, pass(a) :: scalv => psb_z_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_z_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_mv_hlg_from_fmt + procedure, pass(a) :: free => z_hlg_free + procedure, pass(a) :: mold => psb_z_hlg_mold + procedure, pass(a) :: is_host => z_hlg_is_host + procedure, pass(a) :: is_dev => z_hlg_is_dev + procedure, pass(a) :: is_sync => z_hlg_is_sync + procedure, pass(a) :: set_host => z_hlg_set_host + procedure, pass(a) :: set_dev => z_hlg_set_dev + procedure, pass(a) :: set_sync => z_hlg_set_sync + procedure, pass(a) :: sync => z_hlg_sync + procedure, pass(a) :: from_gpu => psb_z_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_z_hlg_to_gpu + final :: z_hlg_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_hlg_mold +#endif + end type psb_z_hlg_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_hlg_get_nzeros, z_hlg_free, z_hlg_get_fmt, & + & z_hlg_get_size, z_hlg_sizeof, z_hlg_get_nz_row + + + interface + subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hlg_vect_mv + end interface + + interface + subroutine psb_z_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hlg_inner_vect_sv + end interface + + interface + subroutine psb_z_hlg_reallocate_nz(nz,a) + import :: psb_z_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_z_hlg_reallocate_nz + end interface + + interface + subroutine psb_z_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_z_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_hlg_allocate_mnnz + end interface + + interface + subroutine psb_z_hlg_mold(a,b,info) + import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hlg_mold + end interface + + interface + subroutine psb_z_hlg_from_gpu(a,info) + import :: psb_z_hlg_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hlg_from_gpu + end interface + + interface + subroutine psb_z_hlg_to_gpu(a,info, nzrm) + import :: psb_z_hlg_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_hlg_to_gpu + end interface + + interface + subroutine psb_z_cp_hlg_from_coo(a,b,info) + import :: psb_z_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hlg_from_coo + end interface + + interface + subroutine psb_z_cp_hlg_from_fmt(a,b,info) + import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hlg_from_fmt + end interface + + interface + subroutine psb_z_mv_hlg_from_coo(a,b,info) + import :: psb_z_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hlg_from_coo + end interface + + + interface + subroutine psb_z_mv_hlg_from_fmt(a,b,info) + import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hlg_from_fmt + end interface + + interface + subroutine psb_z_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hlg_csmv + end interface + interface + subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hlg_csmm + end interface + + interface + subroutine psb_z_hlg_scal(d,a,info, side) + import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_hlg_scal + end interface + + interface + subroutine psb_z_hlg_scals(d,a,info) + import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_hlg_sizeof(a) result(res) + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_hlg_sizeof + + function z_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function z_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_hll_sparse_mat%free() + + return + + end subroutine z_hlg_free + + + subroutine z_hlg_sync(a) + implicit none + class(psb_z_hlg_sparse_mat), target, intent(in) :: a + class(psb_z_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine z_hlg_sync + + subroutine z_hlg_set_host(a) + implicit none + class(psb_z_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_hlg_set_host + + subroutine z_hlg_set_dev(a) + implicit none + class(psb_z_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_hlg_set_dev + + subroutine z_hlg_set_sync(a) + implicit none + class(psb_z_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_hlg_set_sync + + function z_hlg_is_dev(a) result(res) + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_hlg_is_dev + + function z_hlg_is_host(a) result(res) + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_hlg_is_host + + function z_hlg_is_sync(a) result(res) + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_hlg_is_sync + + + subroutine z_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_z_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine z_hlg_finalize + +#else + + interface + subroutine psb_z_hlg_mold(a,b,info) + import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hlg_mold + end interface + +#endif + +end module psb_z_hlg_mat_mod diff --git a/gpu/psb_z_hybg_mat_mod.F90 b/gpu/psb_z_hybg_mat_mod.F90 new file mode 100644 index 00000000..465677e3 --- /dev/null +++ b/gpu/psb_z_hybg_mat_mod.F90 @@ -0,0 +1,306 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if CUDA_SHORT_VERSION <= 10 + +module psb_z_hybg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use cusparse_mod + + type, extends(psb_z_csr_sparse_mat) :: psb_z_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(z_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => z_hybg_get_fmt + procedure, pass(a) :: sizeof => z_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_z_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_hybg_csmm + procedure, pass(a) :: csmv => psb_z_hybg_csmv + procedure, pass(a) :: scals => psb_z_hybg_scals + procedure, pass(a) :: scalv => psb_z_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_z_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_mv_hybg_from_fmt + procedure, pass(a) :: free => z_hybg_free + procedure, pass(a) :: mold => psb_z_hybg_mold + procedure, pass(a) :: to_gpu => psb_z_hybg_to_gpu + final :: z_hybg_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_hybg_mold +#endif + end type psb_z_hybg_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_hybg_get_nzeros, z_hybg_free, z_hybg_get_fmt, & + & z_hybg_get_size, z_hybg_sizeof, z_hybg_get_nz_row + + + interface + subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hybg_inner_vect_sv + end interface + + interface + subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hybg_vect_mv + end interface + + interface + subroutine psb_z_hybg_reallocate_nz(nz,a) + import :: psb_z_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_z_hybg_reallocate_nz + end interface + + interface + subroutine psb_z_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_z_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_hybg_allocate_mnnz + end interface + + interface + subroutine psb_z_hybg_mold(a,b,info) + import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hybg_mold + end interface + + interface + subroutine psb_z_hybg_to_gpu(a,info, nzrm) + import :: psb_z_hybg_sparse_mat, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_hybg_to_gpu + end interface + + interface + subroutine psb_z_cp_hybg_from_coo(a,b,info) + import :: psb_z_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hybg_from_coo + end interface + + interface + subroutine psb_z_cp_hybg_from_fmt(a,b,info) + import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hybg_from_fmt + end interface + + interface + subroutine psb_z_mv_hybg_from_coo(a,b,info) + import :: psb_z_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hybg_from_coo + end interface + + interface + subroutine psb_z_mv_hybg_from_fmt(a,b,info) + import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hybg_from_fmt + end interface + + interface + subroutine psb_z_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hybg_csmv + end interface + interface + subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hybg_csmm + end interface + + interface + subroutine psb_z_hybg_scal(d,a,info,side) + import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_hybg_scal + end interface + + interface + subroutine psb_z_hybg_scals(d,a,info) + import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_hybg_sizeof(a) result(res) + implicit none + class(psb_z_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_hybg_sizeof + + function z_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function z_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_z_csr_sparse_mat%free() + + return + + end subroutine z_hybg_free + + subroutine z_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_z_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine z_hybg_finalize + +#else + + interface + subroutine psb_z_hybg_mold(a,b,info) + import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hybg_mold + end interface + +#endif + +end module psb_z_hybg_mat_mod +#endif diff --git a/gpu/psb_z_vectordev_mod.F90 b/gpu/psb_z_vectordev_mod.F90 new file mode 100644 index 00000000..58c43a43 --- /dev/null +++ b/gpu/psb_z_vectordev_mod.F90 @@ -0,0 +1,390 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_vectordev_mod + + use psb_base_vectordev_mod + +#ifdef HAVE_SPGPU + + interface registerMapped + function registerMappedDoubleComplex(buf,d_p,n,dummy) & + & result(res) bind(c,name='registerMappedDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: buf + type(c_ptr) :: d_p + integer(c_int),value :: n + complex(c_double_complex), value :: dummy + end function registerMappedDoubleComplex + end interface + + interface writeMultiVecDevice + function writeMultiVecDeviceDoubleComplex(deviceVec,hostVec) & + & result(res) bind(c,name='writeMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + complex(c_double_complex) :: hostVec(*) + end function writeMultiVecDeviceDoubleComplex + function writeMultiVecDeviceDoubleComplexR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='writeMultiVecDeviceDoubleComplexR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + complex(c_double_complex) :: hostVec(ld,*) + end function writeMultiVecDeviceDoubleComplexR2 + end interface + + interface readMultiVecDevice + function readMultiVecDeviceDoubleComplex(deviceVec,hostVec) & + & result(res) bind(c,name='readMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + complex(c_double_complex) :: hostVec(*) + end function readMultiVecDeviceDoubleComplex + function readMultiVecDeviceDoubleComplexR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='readMultiVecDeviceDoubleComplexR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + complex(c_double_complex) :: hostVec(ld,*) + end function readMultiVecDeviceDoubleComplexR2 + end interface + + interface allocateDoubleComplex + function allocateDoubleComplex(didx,n) & + & result(res) bind(c,name='allocateDoubleComplex') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: n + integer(c_int) :: res + end function allocateDoubleComplex + function allocateMultiDoubleComplex(didx,m,n) & + & result(res) bind(c,name='allocateMultiDoubleComplex') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: m,n + integer(c_int) :: res + end function allocateMultiDoubleComplex + end interface + + interface writeDoubleComplex + function writeDoubleComplex(didx,hidx,n) & + & result(res) bind(c,name='writeDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_double_complex) :: hidx(*) + integer(c_int),value :: n + end function writeDoubleComplex + function writeDoubleComplexFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='writeDoubleComplexFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_double_complex) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function writeDoubleComplexFirst + function writeMultiDoubleComplex(didx,hidx,m,n) & + & result(res) bind(c,name='writeMultiDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_double_complex) :: hidx(m,*) + integer(c_int),value :: m,n + end function writeMultiDoubleComplex + end interface + + interface readDoubleComplex + function readDoubleComplex(didx,hidx,n) & + & result(res) bind(c,name='readDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_double_complex) :: hidx(*) + integer(c_int),value :: n + end function readDoubleComplex + function readDoubleComplexFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='readDoubleComplexFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_double_complex) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function readDoubleComplexFirst + function readMultiDoubleComplex(didx,hidx,m,n) & + & result(res) bind(c,name='readMultiDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_double_complex) :: hidx(m,*) + integer(c_int),value :: m,n + end function readMultiDoubleComplex + end interface + + interface + subroutine freeDoubleComplex(didx) & + & bind(c,name='freeDoubleComplex') + use iso_c_binding + type(c_ptr), value :: didx + end subroutine freeDoubleComplex + end interface + + + interface setScalDevice + function setScalMultiVecDeviceDoubleComplex(val, first, last, & + & indexBase, deviceVecX) result(res) & + & bind(c,name='setscalMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: first,last,indexbase + complex(c_double_complex), value :: val + type(c_ptr), value :: deviceVecX + end function setScalMultiVecDeviceDoubleComplex + end interface + + interface + function geinsMultiVecDeviceDoubleComplex(n,deviceVecIrl,deviceVecVal,& + & dupl,indexbase,deviceVecX) & + & result(res) bind(c,name='geinsMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n, dupl,indexbase + type(c_ptr), value :: deviceVecIrl, deviceVecVal, deviceVecX + end function geinsMultiVecDeviceDoubleComplex + end interface + + ! New gather functions + + interface + function igathMultiVecDeviceDoubleComplex(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceDoubleComplex + end interface + + interface + function igathMultiVecDeviceDoubleComplexVecIdx(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceDoubleComplexVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceDoubleComplexVecIdx + end interface + + interface + function iscatMultiVecDeviceDoubleComplex(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + complex(c_double_complex),value :: beta + end function iscatMultiVecDeviceDoubleComplex + end interface + + interface + function iscatMultiVecDeviceDoubleComplexVecIdx(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceDoubleComplexVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + complex(c_double_complex),value :: beta + end function iscatMultiVecDeviceDoubleComplexVecIdx + end interface + + + interface scalMultiVecDevice + function scalMultiVecDeviceDoubleComplex(alpha,deviceVecA) & + & result(val) bind(c,name='scalMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + complex(c_double_complex), value :: alpha + type(c_ptr), value :: deviceVecA + end function scalMultiVecDeviceDoubleComplex + end interface + + interface dotMultiVecDevice + function dotMultiVecDeviceDoubleComplex(res, n,deviceVecA,deviceVecB) & + & result(val) bind(c,name='dotMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + complex(c_double_complex) :: res + type(c_ptr), value :: deviceVecA, deviceVecB + end function dotMultiVecDeviceDoubleComplex + end interface + + interface nrm2MultiVecDeviceComplex + function nrm2MultiVecDeviceDoubleComplex(res,n,deviceVecA) & + & result(val) bind(c,name='nrm2MultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_double) :: res + type(c_ptr), value :: deviceVecA + end function nrm2MultiVecDeviceDoubleComplex + end interface + + interface amaxMultiVecDeviceComplex + function amaxMultiVecDeviceDoubleComplex(res,n,deviceVecA) & + & result(val) bind(c,name='amaxMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_double) :: res + type(c_ptr), value :: deviceVecA + end function amaxMultiVecDeviceDoubleComplex + end interface + + interface asumMultiVecDeviceComplex + function asumMultiVecDeviceDoubleComplex(res,n,deviceVecA) & + & result(val) bind(c,name='asumMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_double) :: res + type(c_ptr), value :: deviceVecA + end function asumMultiVecDeviceDoubleComplex + end interface + + + interface axpbyMultiVecDevice + function axpbyMultiVecDeviceDoubleComplex(n,alpha,deviceVecA,beta,deviceVecB) & + & result(res) bind(c,name='axpbyMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_double_complex), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB + end function axpbyMultiVecDeviceDoubleComplex + end interface + + interface axyMultiVecDevice + function axyMultiVecDeviceDoubleComplex(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='axyMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_double_complex), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function axyMultiVecDeviceDoubleComplex + end interface + + interface axybzMultiVecDevice + function axybzMultiVecDeviceDoubleComplex(n,alpha,deviceVecA,deviceVecB,beta,deviceVecZ) & + & result(res) bind(c,name='axybzMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_double_complex), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB,deviceVecZ + end function axybzMultiVecDeviceDoubleComplex + end interface + + + interface absMultiVecDevice + function absMultiVecDeviceDoubleComplex(n,alpha,deviceVecA) & + & result(res) bind(c,name='absMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_double_complex), value :: alpha + type(c_ptr), value :: deviceVecA + end function absMultiVecDeviceDoubleComplex + function absMultiVecDeviceDoubleComplex2(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='absMultiVecDeviceDoubleComplex2') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_double_complex), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function absMultiVecDeviceDoubleComplex2 + end interface + + interface inner_register + module procedure inner_registerDoubleComplex + end interface + + interface inner_unregister + module procedure inner_unregisterDoubleComplex + end interface + +contains + + + function inner_registerDoubleComplex(buffer,dval) result(res) + complex(c_double_complex), allocatable, target :: buffer(:) + type(c_ptr) :: dval + integer(c_int) :: res + complex(c_double_complex) :: dummy + res = registerMapped(c_loc(buffer),dval,size(buffer), dummy) + end function inner_registerDoubleComplex + + subroutine inner_unregisterDoubleComplex(buffer) + complex(c_double_complex), allocatable, target :: buffer(:) + + call unregisterMapped(c_loc(buffer)) + end subroutine inner_unregisterDoubleComplex + +#endif + +end module psb_z_vectordev_mod diff --git a/gpu/s_cusparse_mod.F90 b/gpu/s_cusparse_mod.F90 new file mode 100644 index 00000000..6e628fa1 --- /dev/null +++ b/gpu/s_cusparse_mod.F90 @@ -0,0 +1,305 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module s_cusparse_mod + use base_cusparse_mod + + type, bind(c) :: s_Cmat + type(c_ptr) :: Mat = c_null_ptr + end type s_Cmat + +#if CUDA_SHORT_VERSION <= 10 + type, bind(c) :: s_Hmat + type(c_ptr) :: Mat = c_null_ptr + end type s_Hmat +#endif + + +#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) + + interface CSRGDeviceFree + function s_CSRGDeviceFree(Mat) & + & bind(c,name="s_CSRGDeviceFree") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int) :: res + end function s_CSRGDeviceFree + end interface + + interface CSRGDeviceSetMatType + function s_CSRGDeviceSetMatType(Mat,type) & + & bind(c,name="s_CSRGDeviceSetMatType") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_CSRGDeviceSetMatType + end interface + + interface CSRGDeviceSetMatFillMode + function s_CSRGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="s_CSRGDeviceSetMatFillMode") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_CSRGDeviceSetMatFillMode + end interface + + interface CSRGDeviceSetMatDiagType + function s_CSRGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="s_CSRGDeviceSetMatDiagType") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_CSRGDeviceSetMatDiagType + end interface + + interface CSRGDeviceSetMatIndexBase + function s_CSRGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="s_CSRGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_CSRGDeviceSetMatIndexBase + end interface + + interface CSRGDeviceCsrsmAnalysis + function s_CSRGDeviceCsrsmAnalysis(Mat) & + & bind(c,name="s_CSRGDeviceCsrsmAnalysis") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int) :: res + end function s_CSRGDeviceCsrsmAnalysis + end interface + + interface CSRGDeviceAlloc + function s_CSRGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="s_CSRGDeviceAlloc") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function s_CSRGDeviceAlloc + end interface + + interface CSRGDeviceGetParms + function s_CSRGDeviceGetParms(Mat,nr,nc,nz) & + & bind(c,name="s_CSRGDeviceGetParms") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int) :: nr, nc, nz + integer(c_int) :: res + end function s_CSRGDeviceGetParms + end interface + + interface spsvCSRGDevice + function s_spsvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="s_spsvCSRGDevice") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_float), value :: alpha,beta + integer(c_int) :: res + end function s_spsvCSRGDevice + end interface + + interface spmvCSRGDevice + function s_spmvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="s_spmvCSRGDevice") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_float), value :: alpha,beta + integer(c_int) :: res + end function s_spmvCSRGDevice + end interface + + interface CSRGHost2Device + function s_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="s_CSRGHost2Device") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_float) :: val(*) + integer(c_int) :: res + end function s_CSRGHost2Device + end interface + + interface CSRGDevice2Host + function s_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="s_CSRGDevice2Host") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_float) :: val(*) + integer(c_int) :: res + end function s_CSRGDevice2Host + end interface + +#if CUDA_SHORT_VERSION <= 10 + interface HYBGDeviceAlloc + function s_HYBGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="s_HYBGDeviceAlloc") result(res) + use iso_c_binding + import s_hmat + type(s_Hmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function s_HYBGDeviceAlloc + end interface + + interface HYBGDeviceFree + function s_HYBGDeviceFree(Mat) & + & bind(c,name="s_HYBGDeviceFree") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + integer(c_int) :: res + end function s_HYBGDeviceFree + end interface + + interface HYBGDeviceSetMatType + function s_HYBGDeviceSetMatType(Mat,type) & + & bind(c,name="s_HYBGDeviceSetMatType") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_HYBGDeviceSetMatType + end interface + + interface HYBGDeviceSetMatFillMode + function s_HYBGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="s_HYBGDeviceSetMatFillMode") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_HYBGDeviceSetMatFillMode + end interface + + interface HYBGDeviceSetMatDiagType + function s_HYBGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="s_HYBGDeviceSetMatDiagType") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_HYBGDeviceSetMatDiagType + end interface + + interface HYBGDeviceSetMatIndexBase + function s_HYBGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="s_HYBGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_HYBGDeviceSetMatIndexBase + end interface + + interface HYBGDeviceHybsmAnalysis + function s_HYBGDeviceHybsmAnalysis(Mat) & + & bind(c,name="s_HYBGDeviceHybsmAnalysis") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + integer(c_int) :: res + end function s_HYBGDeviceHybsmAnalysis + end interface + + interface spsvHYBGDevice + function s_spsvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="s_spsvHYBGDevice") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_float), value :: alpha,beta + integer(c_int) :: res + end function s_spsvHYBGDevice + end interface + + interface spmvHYBGDevice + function s_spmvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="s_spmvHYBGDevice") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_float), value :: alpha,beta + integer(c_int) :: res + end function s_spmvHYBGDevice + end interface + + interface HYBGHost2Device + function s_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="s_HYBGHost2Device") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_float) :: val(*) + integer(c_int) :: res + end function s_HYBGHost2Device + end interface +#endif + +#endif + +end module s_cusparse_mod diff --git a/gpu/scusparse.c b/gpu/scusparse.c new file mode 100644 index 00000000..70a0cbd7 --- /dev/null +++ b/gpu/scusparse.c @@ -0,0 +1,95 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#ifdef HAVE_SPGPU +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + +/* Single precision real */ +#define TYPE float +#define CUSPARSE_BASE_TYPE CUDA_R_32F +#define T_CSRGDeviceMat s_CSRGDeviceMat +#define T_Cmat s_Cmat +#define T_spmvCSRGDevice s_spmvCSRGDevice +#define T_spsvCSRGDevice s_spsvCSRGDevice +#define T_CSRGDeviceAlloc s_CSRGDeviceAlloc +#define T_CSRGDeviceFree s_CSRGDeviceFree +#define T_CSRGHost2Device s_CSRGHost2Device +#define T_CSRGDevice2Host s_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode s_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType s_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms s_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis s_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseScsrmv +#define cusparseTcsrsv_solve cusparseScsrsv_solve +#define cusparseTcsrsv_analysis cusparseScsrsv_analysis + +#define T_HYBGDeviceMat s_HYBGDeviceMat +#define T_Hmat s_Hmat +#define T_HYBGDeviceFree s_HYBGDeviceFree +#define T_spmvHYBGDevice s_spmvHYBGDevice +#define T_HYBGDeviceAlloc s_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType s_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase s_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType s_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode s_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis s_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice s_spsvHYBGDevice +#define T_HYBGHost2Device s_HYBGHost2Device +#define cusparseThybmv cusparseShybmv +#define cusparseThybsv_solve cusparseShybsv_solve +#define cusparseThybsv_analysis cusparseShybsv_analysis +#define cusparseTcsr2hyb cusparseScsr2hyb + + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis s_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseScsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseScsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseScsrsv2_solve +#endif + +#include "fcusparse_fct.h" + +#endif diff --git a/gpu/svectordev.c b/gpu/svectordev.c new file mode 100644 index 00000000..d193a4d8 --- /dev/null +++ b/gpu/svectordev.c @@ -0,0 +1,304 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +#if defined(HAVE_SPGPU) +//#include "utils.h" +//#include "common.h" +#include "svectordev.h" + + +int registerMappedFloat(void *buff, void **d_p, int n, float dummy) +{ + return registerMappedMemory(buff,d_p,n*sizeof(float)); +} + +int writeMultiVecDeviceFloat(void* deviceVec, float* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, devVec->pitch_*devVec->count_*sizeof(float)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceFloatR2(void* deviceVec, float* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceFloat(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceFloatR2",i); + } + return(i); +} + +int readMultiVecDeviceFloat(void* deviceVec, float* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(float)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloat",i); + } + return(i); +} + +int readMultiVecDeviceFloatR2(void* deviceVec, float* hostVec, int ld) +{ int i; + i = readMultiVecDeviceFloat(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloatR2",i); + } + return(i); +} + +int setscalMultiVecDeviceFloat(float val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuSsetscal(handle, first, last, indexBase, val, (float *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceFloat(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + float beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = 0.0; + else if (dupl == INS_ADD) + beta = 1.0; + else + beta = 0.0; + + spgpuSscat(handle, (float *) devVecX->v_, n, (float*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceFloat(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + float *hv = (float *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuSgath(handle,hv, n, idx,indexBase, (float *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, int first, void *deviceIdx, + int hfirst, void* host_values, int indexBase, float beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceFloat(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, float beta) +{ int i=0; + float *hv = (float *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuSscat(handle, (float *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + + +int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuSmnrm2(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuSmamax(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuSmasum(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int scalMultiVecDeviceFloat(float alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuSscal(handle, (float *)devVecA->v_, devVecA->pitch_, + alpha, (float *)devVecA->v_); + return(i); +} + +int dotMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,devVecA->count_,devVecB->pitch_); + return(i); +} + +int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX, + float beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuSaxpby(handle,(float*)devVecY->v_+pitch*j, n, beta, + (float*)devVecY->v_+pitch*j, alpha,(float*) devVecX->v_+pitch*j); + return(i); +} + +int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSmaxy(handle, (float*)devVecB->v_, n, alpha, (float*)devVecA->v_, + (float*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, + void *deviceVecB, float beta, void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuSmaxypbz(handle, (float*)devVecZ->v_, n, beta, (float*)devVecZ->v_, + alpha, (float*) devVecA->v_, (float*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + +int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSabs(handle, (float*)devVecB->v_, n, alpha, (float*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_gpuGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuSabs(handle, (float*)devVecA->v_, n, alpha, (float*)devVecA->v_); + + return(i); +} + +#endif + diff --git a/gpu/svectordev.h b/gpu/svectordev.h new file mode 100644 index 00000000..1fd4fd11 --- /dev/null +++ b/gpu/svectordev.h @@ -0,0 +1,78 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +#if defined(HAVE_SPGPU) +//#include "utils.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +int registerMappedFloat(void *, void **, int, float); +int writeMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec); +int writeMultiVecDeviceFloatR2(void* deviceMultiVec, float* hostMultiVec, int ld); +int readMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec); +int readMultiVecDeviceFloatR2(void* deviceMultiVec, float* hostMultiVec, int ld); + +int setscalMultiVecDeviceFloat(float val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceFloat(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, float beta); +int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, float beta); + +int scalMultiVecDeviceFloat(float alpha, void* devMultiVecA); +int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devVecA); +int amaxMultiVecDeviceFloat(float* y_res, int n, void* devVecA); +int asumMultiVecDeviceFloat(float* y_res, int n, void* devVecA); +int dotMultiVecDeviceFloat(float* y_res, int n, void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceFloat(int n, float alpha, void* devVecX, float beta, void* devVecY); +int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, + void *deviceVecB, float beta, void *deviceVecZ); +int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA); +int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, void *deviceVecB); + + +#endif diff --git a/gpu/vectordev.c b/gpu/vectordev.c new file mode 100644 index 00000000..2b22a8a6 --- /dev/null +++ b/gpu/vectordev.c @@ -0,0 +1,198 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +#if defined(HAVE_SPGPU) +#include "cuComplex.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +//new +MultiVectorDeviceParams getMultiVectorDeviceParams(unsigned int count, unsigned int size, + unsigned int elementType) +{ + struct MultiVectorDeviceParams params; + + if (count == 1) + params.pitch = size; + else + if (elementType == SPGPU_TYPE_INT) + { + //fprintf(stderr,"Getting parms for a DOUBLE vector\n"); + params.pitch = (((size*sizeof(int) + 255)/256)*256)/sizeof(int); + } + else if (elementType == SPGPU_TYPE_DOUBLE) + { + //fprintf(stderr,"Getting parms for a DOUBLE vector\n"); + params.pitch = (((size*sizeof(double) + 255)/256)*256)/sizeof(double); + } + else if (elementType == SPGPU_TYPE_FLOAT) + { + params.pitch = (((size*sizeof(float) + 255)/256)*256)/sizeof(float); + } + else if (elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + params.pitch = (((size*sizeof(cuFloatComplex) + 255)/256)*256)/sizeof(cuFloatComplex); + } + else if (elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + params.pitch = (((size*sizeof(cuDoubleComplex) + 255)/256)*256)/sizeof(cuDoubleComplex); + } + else + params.pitch = 0; + + params.elementType = elementType; + + params.count = count; + params.size = size; + + return params; + +} +//new +int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *params) +{ + if (params->pitch == 0) + return SPGPU_UNSUPPORTED; // Unsupported params + + struct MultiVectDevice *tmp = (struct MultiVectDevice *)malloc(sizeof(struct MultiVectDevice)); + *remoteMultiVec = (void *)tmp; + tmp->size_ = params->size; + tmp->count_ = params->count; + + if (params->elementType == SPGPU_TYPE_INT) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (((params->size*sizeof(int) + 255)/256)*256)/sizeof(int); + //fprintf(stderr,"Allocating an INT vector %ld\n",tmp->pitch_*tmp->count_*sizeof(double)); + + return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*params->count*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (((params->size*sizeof(float) + 255)/256)*256)/sizeof(float); + + return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*params->count*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (int)(((params->size*sizeof(double) + 255)/256)*256)/sizeof(double); + //fprintf(stderr,"Allocating a DOUBLE vector %ld\n",tmp->pitch_*tmp->count_*sizeof(double)); + + return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(double)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (int)(((params->size*sizeof(cuFloatComplex) + 255)/256)*256)/sizeof(cuFloatComplex); + return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (int)(((params->size*sizeof(cuDoubleComplex) + 255)/256)*256)/sizeof(cuDoubleComplex); + return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return SPGPU_SUCCESS; // Success +} + + +int unregisterMapped(void *buff) +{ + return unregisterMappedMemory(buff); +} + +void freeMultiVecDevice(void* deviceVec) +{ + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // fprintf(stderr,"freeMultiVecDevice\n"); + if (devVec != NULL) { + //fprintf(stderr,"Before freeMultiVecDevice% ld\n",devVec->pitch_*devVec->count_*sizeof(double)); + freeRemoteBuffer(devVec->v_); + free(deviceVec); + } +} + +int FallocMultiVecDevice(void** deviceMultiVec, unsigned int count, + unsigned int size, unsigned int elementType) +{ int i; + struct MultiVectorDeviceParams p; + + p = getMultiVectorDeviceParams(count, size, elementType); + i = allocMultiVecDevice(deviceMultiVec, &p); + //cudaSync(); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d, %d %d \n","FallocMultiVecDevice",i, count, size); + } + return(i); +} + +int getMultiVecDeviceSize(void* deviceVec) +{ int i; + struct MultiVectDevice *dev = (struct MultiVectDevice *) deviceVec; + i = dev->size_; + return(i); +} + +int getMultiVecDeviceCount(void* deviceVec) +{ int i; + struct MultiVectDevice *dev = (struct MultiVectDevice *) deviceVec; + i = dev->count_; + return(i); +} + +int getMultiVecDevicePitch(void* deviceVec) +{ int i; + struct MultiVectDevice *dev = (struct MultiVectDevice *) deviceVec; + i = dev->pitch_; + return(i); +} + +#endif + diff --git a/gpu/vectordev.h b/gpu/vectordev.h new file mode 100644 index 00000000..9739c01b --- /dev/null +++ b/gpu/vectordev.h @@ -0,0 +1,90 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +#if defined(HAVE_SPGPU) +//#include "utils.h" +#include "cuda_runtime.h" +//#include "common.h" +#include "cintrf.h" +#include + +struct MultiVectDevice +{ + // number of vectors + int count_; + + //number of elements for a single vector + int size_; + + //pithc in number of elements + int pitch_; + + // Vectors in device memory (single allocation) + void *v_; +}; + +typedef struct MultiVectorDeviceParams +{ + // number on vectors + unsigned int count; //1 for a simple vector + + // The resulting allocation will be pitch*s*(size of the elementType) + unsigned int elementType; + + // Pitch (in number of elements) + unsigned int pitch; + + // Size of a single vector (in number of elements). + unsigned int size; +} MultiVectorDeviceParams; + + +#define INS_OVERWRITE 0 +#define INS_ADD 1 + + +int unregisterMapped(void *); + +MultiVectorDeviceParams getMultiVectorDeviceParams(unsigned int count, + unsigned int size, + unsigned int elementType); + +int FallocMultiVecDevice(void** deviceMultiVec, unsigned count, + unsigned int size, unsigned int elementType); +void freeMultiVecDevice(void* deviceVec); +int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *params); +int getMultiVecDeviceSize(void* deviceVec); +int getMultiVecDeviceCount(void* deviceVec); +int getMultiVecDevicePitch(void* deviceVec); + +#endif diff --git a/gpu/z_cusparse_mod.F90 b/gpu/z_cusparse_mod.F90 new file mode 100644 index 00000000..020f1de5 --- /dev/null +++ b/gpu/z_cusparse_mod.F90 @@ -0,0 +1,305 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module z_cusparse_mod + use base_cusparse_mod + + type, bind(c) :: z_Cmat + type(c_ptr) :: Mat = c_null_ptr + end type z_Cmat + +#if CUDA_SHORT_VERSION <= 10 + type, bind(c) :: z_Hmat + type(c_ptr) :: Mat = c_null_ptr + end type z_Hmat +#endif + + +#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) + + interface CSRGDeviceFree + function z_CSRGDeviceFree(Mat) & + & bind(c,name="z_CSRGDeviceFree") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: res + end function z_CSRGDeviceFree + end interface + + interface CSRGDeviceSetMatType + function z_CSRGDeviceSetMatType(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatType") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatType + end interface + + interface CSRGDeviceSetMatFillMode + function z_CSRGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatFillMode") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatFillMode + end interface + + interface CSRGDeviceSetMatDiagType + function z_CSRGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatDiagType") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatDiagType + end interface + + interface CSRGDeviceSetMatIndexBase + function z_CSRGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatIndexBase + end interface + + interface CSRGDeviceCsrsmAnalysis + function z_CSRGDeviceCsrsmAnalysis(Mat) & + & bind(c,name="z_CSRGDeviceCsrsmAnalysis") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: res + end function z_CSRGDeviceCsrsmAnalysis + end interface + + interface CSRGDeviceAlloc + function z_CSRGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="z_CSRGDeviceAlloc") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function z_CSRGDeviceAlloc + end interface + + interface CSRGDeviceGetParms + function z_CSRGDeviceGetParms(Mat,nr,nc,nz) & + & bind(c,name="z_CSRGDeviceGetParms") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: nr, nc, nz + integer(c_int) :: res + end function z_CSRGDeviceGetParms + end interface + + interface spsvCSRGDevice + function z_spsvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spsvCSRGDevice") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spsvCSRGDevice + end interface + + interface spmvCSRGDevice + function z_spmvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spmvCSRGDevice") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spmvCSRGDevice + end interface + + interface CSRGHost2Device + function z_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="z_CSRGHost2Device") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_double_complex) :: val(*) + integer(c_int) :: res + end function z_CSRGHost2Device + end interface + + interface CSRGDevice2Host + function z_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="z_CSRGDevice2Host") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_double_complex) :: val(*) + integer(c_int) :: res + end function z_CSRGDevice2Host + end interface + +#if CUDA_SHORT_VERSION <= 10 + interface HYBGDeviceAlloc + function z_HYBGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="z_HYBGDeviceAlloc") result(res) + use iso_c_binding + import z_hmat + type(z_Hmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function z_HYBGDeviceAlloc + end interface + + interface HYBGDeviceFree + function z_HYBGDeviceFree(Mat) & + & bind(c,name="z_HYBGDeviceFree") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int) :: res + end function z_HYBGDeviceFree + end interface + + interface HYBGDeviceSetMatType + function z_HYBGDeviceSetMatType(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatType") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatType + end interface + + interface HYBGDeviceSetMatFillMode + function z_HYBGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatFillMode") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatFillMode + end interface + + interface HYBGDeviceSetMatDiagType + function z_HYBGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatDiagType") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatDiagType + end interface + + interface HYBGDeviceSetMatIndexBase + function z_HYBGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatIndexBase + end interface + + interface HYBGDeviceHybsmAnalysis + function z_HYBGDeviceHybsmAnalysis(Mat) & + & bind(c,name="z_HYBGDeviceHybsmAnalysis") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int) :: res + end function z_HYBGDeviceHybsmAnalysis + end interface + + interface spsvHYBGDevice + function z_spsvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spsvHYBGDevice") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spsvHYBGDevice + end interface + + interface spmvHYBGDevice + function z_spmvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spmvHYBGDevice") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spmvHYBGDevice + end interface + + interface HYBGHost2Device + function z_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="z_HYBGHost2Device") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_double_complex) :: val(*) + integer(c_int) :: res + end function z_HYBGHost2Device + end interface +#endif + +#endif + +end module z_cusparse_mod diff --git a/gpu/zcusparse.c b/gpu/zcusparse.c new file mode 100644 index 00000000..3991359a --- /dev/null +++ b/gpu/zcusparse.c @@ -0,0 +1,94 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#ifdef HAVE_SPGPU +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + +/* Double precision complex */ +#define TYPE double complex +#define CUSPARSE_BASE_TYPE CUDA_C_64F +#define T_CSRGDeviceMat z_CSRGDeviceMat +#define T_Cmat z_Cmat +#define T_spmvCSRGDevice z_spmvCSRGDevice +#define T_spsvCSRGDevice z_spsvCSRGDevice +#define T_CSRGDeviceAlloc z_CSRGDeviceAlloc +#define T_CSRGDeviceFree z_CSRGDeviceFree +#define T_CSRGHost2Device z_CSRGHost2Device +#define T_CSRGDevice2Host z_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode z_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType z_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms z_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis z_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseZcsrmv +#define cusparseTcsrsv_solve cusparseZcsrsv_solve +#define cusparseTcsrsv_analysis cusparseZcsrsv_analysis +#define T_HYBGDeviceMat z_HYBGDeviceMat +#define T_Hmat z_Hmat +#define T_HYBGDeviceFree z_HYBGDeviceFree +#define T_spmvHYBGDevice z_spmvHYBGDevice +#define T_HYBGDeviceAlloc z_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType z_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase z_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType z_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode z_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis z_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice z_spsvHYBGDevice +#define T_HYBGHost2Device z_HYBGHost2Device +#define cusparseThybmv cusparseZhybmv +#define cusparseThybsv_solve cusparseZhybsv_solve +#define cusparseThybsv_analysis cusparseZhybsv_analysis +#define cusparseTcsr2hyb cusparseZcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis z_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseZcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseZcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseZcsrsv2_solve +#endif + +#include "fcusparse_fct.h" + + +#endif diff --git a/gpu/zvectordev.c b/gpu/zvectordev.c new file mode 100644 index 00000000..c245719f --- /dev/null +++ b/gpu/zvectordev.c @@ -0,0 +1,321 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +#if defined(HAVE_SPGPU) +//#include "utils.h" +//#include "common.h" +#include "zvectordev.h" + + +int registerMappedDoubleComplex(void *buff, void **d_p, int n, cuDoubleComplex dummy) +{ + return registerMappedMemory(buff,d_p,n*sizeof(cuDoubleComplex)); +} + +int writeMultiVecDeviceDoubleComplex(void* deviceVec, cuDoubleComplex* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(cuDoubleComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceDoubleComplexR2(void* deviceVec, cuDoubleComplex* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceDoubleComplex(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceDoubleComplexR2",i); + } + return(i); +} + +int readMultiVecDeviceDoubleComplex(void* deviceVec, cuDoubleComplex* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(cuDoubleComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleComplex",i); + } + return(i); +} + +int readMultiVecDeviceDoubleComplexR2(void* deviceVec, cuDoubleComplex* hostVec, int ld) +{ int i; + i = readMultiVecDeviceDoubleComplex(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleComplexR2",i); + } + return(i); +} + +int setscalMultiVecDeviceDoubleComplex(cuDoubleComplex val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuZsetscal(handle, first, last, indexBase, val, (cuDoubleComplex *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceDoubleComplex(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + cuDoubleComplex beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + if (dupl == INS_OVERWRITE) + beta = make_cuDoubleComplex(0.0, 0.0); + else if (dupl == INS_ADD) + beta = make_cuDoubleComplex(1.0, 0.0); + else + beta = make_cuDoubleComplex(0.0, 0.0); + + spgpuZscat(handle, (cuDoubleComplex *) devVecX->v_, n, (cuDoubleComplex*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceDoubleComplex(deviceVec, vectorId, n, + first, (void*) devIdx->v_, + hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + cuDoubleComplex *hv = (cuDoubleComplex *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuZgath(handle,hv, n, idx,indexBase, + (cuDoubleComplex *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void *deviceIdx, + int hfirst, void* host_values, + int indexBase, cuDoubleComplex beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceDoubleComplex(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void *indexes, + int hfirst, void* host_values, + int indexBase, cuDoubleComplex beta) +{ int i=0; + cuDoubleComplex *hv = (cuDoubleComplex *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuZscat(handle, (cuDoubleComplex *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + + +int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuZmnrm2(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuZmamax(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuZmasum(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + + return(i); +} + +int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuZscal(handle, (cuDoubleComplex *)devVecA->v_, devVecA->pitch_, + alpha, (cuDoubleComplex *)devVecA->v_); + return(i); +} + +int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuZmdot(handle, y_res, n, (cuDoubleComplex*)devVecA->v_, + (cuDoubleComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); + return(i); +} + +int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMultiVecX, + cuDoubleComplex beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuZaxpby(handle,(cuDoubleComplex*)devVecY->v_+pitch*j, n, beta, + (cuDoubleComplex*)devVecY->v_+pitch*j, alpha, + (cuDoubleComplex*) devVecX->v_+pitch*j); + return(i); +} + +int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, + void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZmaxy(handle, (cuDoubleComplex*)devVecB->v_, n, alpha, + (cuDoubleComplex*)devVecA->v_, + (cuDoubleComplex*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, + void *deviceVecB, cuDoubleComplex beta, void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuZmaxypbz(handle, (cuDoubleComplex*)devVecZ->v_, n, beta, + (cuDoubleComplex*)devVecZ->v_, + alpha, (cuDoubleComplex*) devVecA->v_, (cuDoubleComplex*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + + +int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZabs(handle, (cuDoubleComplex*)devVecB->v_, n, + alpha, (cuDoubleComplex*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_gpuGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuZabs(handle, (cuDoubleComplex*)devVecA->v_, n, + alpha, (cuDoubleComplex*)devVecA->v_); + + return(i); +} + +#endif + diff --git a/gpu/zvectordev.h b/gpu/zvectordev.h new file mode 100644 index 00000000..ca3c966e --- /dev/null +++ b/gpu/zvectordev.h @@ -0,0 +1,91 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +#if defined(HAVE_SPGPU) +//#include "utils.h" +#include +#include "cuComplex.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +int registerMappedDoubleComplex(void *, void **, int, cuDoubleComplex); +int writeMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec); +int writeMultiVecDeviceDoubleComplexR2(void* deviceMultiVec, + cuDoubleComplex* hostMultiVec, int ld); +int readMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec); +int readMultiVecDeviceDoubleComplexR2(void* deviceMultiVec, + cuDoubleComplex* hostMultiVec, int ld); +int setscalMultiVecDeviceDoubleComplex(cuDoubleComplex val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceDoubleComplex(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, + int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, + int n, int first, + void *deviceIdx, int hfirst, + void* host_values, + int indexBase, cuDoubleComplex beta); +int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void *indexes, + int hfirst, void* host_values, + int indexBase, cuDoubleComplex beta); + +int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA); +int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA); +int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA); +int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA); +int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, + void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void* devVecX, + cuDoubleComplex beta, void* devVecY); +int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, + void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, + void *deviceVecB, cuDoubleComplex beta, + void *deviceVecZ); +int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA); +int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, + void *deviceVecA, void *deviceVecB); + + +#endif From d0d4e458771e50c4af63ef8e1d1f9184a0d46b60 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 29 Sep 2023 13:59:40 +0200 Subject: [PATCH 006/110] Fix for I gpu types from template: use psb_sizeof_ip --- gpu/psb_i_csrg_mat_mod.F90 | 2 +- gpu/psb_i_diag_mat_mod.F90 | 2 +- gpu/psb_i_elg_mat_mod.F90 | 2 +- gpu/psb_i_hlg_mat_mod.F90 | 2 +- gpu/psb_i_hybg_mat_mod.F90 | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gpu/psb_i_csrg_mat_mod.F90 b/gpu/psb_i_csrg_mat_mod.F90 index de25370f..9a4a3852 100644 --- a/gpu/psb_i_csrg_mat_mod.F90 +++ b/gpu/psb_i_csrg_mat_mod.F90 @@ -258,7 +258,7 @@ contains integer(psb_epk_) :: res if (a%is_dev()) call a%sync() res = 8 - res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%val) res = res + psb_sizeof_ip * size(a%irp) res = res + psb_sizeof_ip * size(a%ja) ! Should we account for the shadow data structure diff --git a/gpu/psb_i_diag_mat_mod.F90 b/gpu/psb_i_diag_mat_mod.F90 index 3559c09a..b54ee8d5 100644 --- a/gpu/psb_i_diag_mat_mod.F90 +++ b/gpu/psb_i_diag_mat_mod.F90 @@ -236,7 +236,7 @@ contains integer(psb_epk_) :: res res = 8 - res = res + psb_sizeof_int * size(a%data) + res = res + psb_sizeof_ip * size(a%data) res = res + psb_sizeof_ip * size(a%offset) ! Should we account for the shadow data structure diff --git a/gpu/psb_i_elg_mat_mod.F90 b/gpu/psb_i_elg_mat_mod.F90 index afc71662..a421e611 100644 --- a/gpu/psb_i_elg_mat_mod.F90 +++ b/gpu/psb_i_elg_mat_mod.F90 @@ -294,7 +294,7 @@ contains if (a%is_dev()) call a%sync() res = 8 - res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%val) res = res + psb_sizeof_ip * size(a%irn) res = res + psb_sizeof_ip * size(a%idiag) res = res + psb_sizeof_ip * size(a%ja) diff --git a/gpu/psb_i_hlg_mat_mod.F90 b/gpu/psb_i_hlg_mat_mod.F90 index 92917d47..2ec881ce 100644 --- a/gpu/psb_i_hlg_mat_mod.F90 +++ b/gpu/psb_i_hlg_mat_mod.F90 @@ -260,7 +260,7 @@ contains if (a%is_dev()) call a%sync() res = 8 - res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%val) res = res + psb_sizeof_ip * size(a%irn) res = res + psb_sizeof_ip * size(a%idiag) res = res + psb_sizeof_ip * size(a%hkoffs) diff --git a/gpu/psb_i_hybg_mat_mod.F90 b/gpu/psb_i_hybg_mat_mod.F90 index 9e682365..388a8801 100644 --- a/gpu/psb_i_hybg_mat_mod.F90 +++ b/gpu/psb_i_hybg_mat_mod.F90 @@ -236,7 +236,7 @@ contains class(psb_i_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 - res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%val) res = res + psb_sizeof_ip * size(a%irp) res = res + psb_sizeof_ip * size(a%ja) ! Should we account for the shadow data structure From 81e9121c910213c82ff1af15de0dbabd27e738d2 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 2 Oct 2023 13:08:56 +0200 Subject: [PATCH 007/110] Add GPULDLIBS into Make.inc (and fix configry) --- Make.inc.in | 2 ++ configure | 3 +++ configure.ac | 4 +++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Make.inc.in b/Make.inc.in index e28e9bee..f0b21f91 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -69,6 +69,7 @@ CBINDLIBNAME=libpsb_cbind.a GPUD=@GPUD@ GPULD=@GPULD@ +LGPU=@LGPU@ SPGPUDIR=@SPGPU_DIR@ SPGPU_INCDIR=@SPGPU_INCDIR@ @@ -92,3 +93,4 @@ CUDEFINES=@CUDEFINES@ @PSBLASRULES@ +PSBGPULDLIBS=$(LGPU) $(SPGPU_LIBS) $(CUDA_LIBS) $(LIBS) \ No newline at end of file diff --git a/configure b/configure index 5c3444b3..d2fa80d2 100755 --- a/configure +++ b/configure @@ -653,6 +653,7 @@ ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS LIBOBJS +LGPU GPULD GPUD CUDEFINES @@ -11020,6 +11021,7 @@ CPPFLAGS="$SAVE_CPPFLAGS" if test "x$pac_cv_have_spgpu" == "xyes" ; then GPUD=gpud; GPULD=gpuld; + LGPU="-lpsb_gpu"; EXTRALDLIBS="-lstdc++"; fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: At this point GPUTARGET is $GPUD $GPULD" >&5 @@ -11161,6 +11163,7 @@ FDEFINES=$(PSBFDEFINES) + ############################################################################### diff --git a/configure.ac b/configure.ac index 4b7f82e1..cb23c6ac 100755 --- a/configure.ac +++ b/configure.ac @@ -801,7 +801,8 @@ CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); PAC_CHECK_SPGPU() if test "x$pac_cv_have_spgpu" == "xyes" ; then GPUD=gpud; - GPULD=gpuld; + GPULD=gpuld; + LGPU="-lpsb_gpu"; EXTRALDLIBS="-lstdc++"; fi AC_MSG_NOTICE([At this point GPUTARGET is $GPUD $GPULD]) @@ -933,6 +934,7 @@ AC_SUBST(CUDA_NVCC) AC_SUBST(CUDEFINES) AC_SUBST(GPUD) AC_SUBST(GPULD) +AC_SUBST(LGPU) ############################################################################### # the following files will be created by Automake From 273233691545139fae7680debffcf6870d03a1fc Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 16 Oct 2023 16:15:00 +0200 Subject: [PATCH 008/110] Fix gpu/makefile --- gpu/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu/Makefile b/gpu/Makefile index 16e9c084..10e17d55 100755 --- a/gpu/Makefile +++ b/gpu/Makefile @@ -118,7 +118,7 @@ cudaobjs: $(FOBJS) ilib: objs $(MAKE) -C impl lib LIBNAME=$(LIBNAME) -cudalib: objs +cudalib: objs ilib $(MAKE) -C CUDA lib LIBNAME=$(LIBNAME) clean: cclean iclean cudaclean From 6aa7987d52240ec9db6b5346cacc7bb80cb78c7b Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 19 Oct 2023 14:22:32 +0200 Subject: [PATCH 009/110] Rename GPU into cuda, and merge SPGPU code. --- Make.inc.in | 14 +- Makefile | 18 +- configure | 210 +-- configure.ac | 40 +- {gpu => cuda}/CUDA/Makefile | 3 +- {gpu => cuda}/CUDA/psi_cuda_CopyCooToElg.cuh | 0 {gpu => cuda}/CUDA/psi_cuda_CopyCooToHlg.cuh | 0 {gpu => cuda}/CUDA/psi_cuda_c_CopyCooToElg.cu | 0 {gpu => cuda}/CUDA/psi_cuda_c_CopyCooToHlg.cu | 0 {gpu => cuda}/CUDA/psi_cuda_common.cuh | 0 {gpu => cuda}/CUDA/psi_cuda_d_CopyCooToElg.cu | 0 {gpu => cuda}/CUDA/psi_cuda_d_CopyCooToHlg.cu | 0 {gpu => cuda}/CUDA/psi_cuda_s_CopyCooToElg.cu | 0 {gpu => cuda}/CUDA/psi_cuda_s_CopyCooToHlg.cu | 0 {gpu => cuda}/CUDA/psi_cuda_z_CopyCooToElg.cu | 0 {gpu => cuda}/CUDA/psi_cuda_z_CopyCooToHlg.cu | 0 {gpu => cuda}/Makefile | 19 +- {gpu => cuda}/base_cusparse_mod.F90 | 0 {gpu => cuda}/c_cusparse_mod.F90 | 0 {gpu => cuda}/ccusparse.c | 0 {gpu => cuda}/cintrf.h | 0 {gpu => cuda}/core_mod.f90 | 0 {gpu => cuda}/cuda_util.c | 0 {gpu => cuda}/cuda_util.h | 0 {gpu => cuda}/cusparse_mod.F90 | 0 {gpu => cuda}/cvectordev.c | 0 {gpu => cuda}/cvectordev.h | 0 {gpu => cuda}/d_cusparse_mod.F90 | 0 {gpu => cuda}/dcusparse.c | 0 {gpu => cuda}/diagdev.c | 0 {gpu => cuda}/diagdev.h | 0 {gpu => cuda}/diagdev_mod.F90 | 0 {gpu => cuda}/dnsdev.c | 0 {gpu => cuda}/dnsdev.h | 0 {gpu => cuda}/dnsdev_mod.F90 | 0 {gpu => cuda}/dvectordev.c | 0 {gpu => cuda}/dvectordev.h | 0 {gpu => cuda}/elldev.c | 0 {gpu => cuda}/elldev.h | 0 {gpu => cuda}/elldev_mod.F90 | 0 {gpu => cuda}/fcusparse.c | 0 {gpu => cuda}/fcusparse.h | 0 {gpu => cuda}/fcusparse_fct.h | 0 {gpu => cuda}/hdiagdev.c | 0 {gpu => cuda}/hdiagdev.h | 0 {gpu => cuda}/hdiagdev_mod.F90 | 0 {gpu => cuda}/hlldev.c | 0 {gpu => cuda}/hlldev.h | 0 {gpu => cuda}/hlldev_mod.F90 | 0 {gpu => cuda}/impl/Makefile | 3 + {gpu => cuda}/impl/psb_c_cp_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_cp_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_c_cp_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_cp_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_cp_elg_from_fmt.F90 | 0 .../impl/psb_c_cp_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_cp_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_cp_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_c_cp_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_cp_hybg_from_fmt.F90 | 0 .../impl/psb_c_csrg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_csmm.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_csmv.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_from_gpu.F90 | 0 .../impl/psb_c_csrg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_mold.F90 | 0 .../impl/psb_c_csrg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_scal.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_scals.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_c_diag_csmv.F90 | 0 {gpu => cuda}/impl/psb_c_diag_mold.F90 | 0 {gpu => cuda}/impl/psb_c_diag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_c_diag_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_c_dnsg_mat_impl.F90 | 0 .../impl/psb_c_elg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_c_elg_asb.f90 | 0 {gpu => cuda}/impl/psb_c_elg_csmm.F90 | 0 {gpu => cuda}/impl/psb_c_elg_csmv.F90 | 0 {gpu => cuda}/impl/psb_c_elg_csput.F90 | 0 {gpu => cuda}/impl/psb_c_elg_from_gpu.F90 | 0 .../impl/psb_c_elg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_c_elg_mold.F90 | 0 .../impl/psb_c_elg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_c_elg_scal.F90 | 0 {gpu => cuda}/impl/psb_c_elg_scals.F90 | 0 {gpu => cuda}/impl/psb_c_elg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_c_elg_trim.f90 | 0 {gpu => cuda}/impl/psb_c_elg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_c_hdiag_csmv.F90 | 0 {gpu => cuda}/impl/psb_c_hdiag_mold.F90 | 0 {gpu => cuda}/impl/psb_c_hdiag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_c_hdiag_vect_mv.F90 | 0 .../impl/psb_c_hlg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_csmm.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_csmv.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_from_gpu.F90 | 0 .../impl/psb_c_hlg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_mold.F90 | 0 .../impl/psb_c_hlg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_scal.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_scals.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_vect_mv.F90 | 0 .../impl/psb_c_hybg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_c_hybg_csmm.F90 | 0 {gpu => cuda}/impl/psb_c_hybg_csmv.F90 | 0 .../impl/psb_c_hybg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_c_hybg_mold.F90 | 0 .../impl/psb_c_hybg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_c_hybg_scal.F90 | 0 {gpu => cuda}/impl/psb_c_hybg_scals.F90 | 0 {gpu => cuda}/impl/psb_c_hybg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_c_hybg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_c_mv_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_mv_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_c_mv_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_mv_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_mv_elg_from_fmt.F90 | 0 .../impl/psb_c_mv_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_mv_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_mv_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_c_mv_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_mv_hybg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_d_cp_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_cp_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_d_cp_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_cp_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_cp_elg_from_fmt.F90 | 0 .../impl/psb_d_cp_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_cp_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_cp_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_d_cp_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_cp_hybg_from_fmt.F90 | 0 .../impl/psb_d_csrg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_csmm.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_csmv.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_from_gpu.F90 | 0 .../impl/psb_d_csrg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_mold.F90 | 0 .../impl/psb_d_csrg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_scal.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_scals.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_d_diag_csmv.F90 | 0 {gpu => cuda}/impl/psb_d_diag_mold.F90 | 0 {gpu => cuda}/impl/psb_d_diag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_d_diag_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_d_dnsg_mat_impl.F90 | 0 .../impl/psb_d_elg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_d_elg_asb.f90 | 0 {gpu => cuda}/impl/psb_d_elg_csmm.F90 | 0 {gpu => cuda}/impl/psb_d_elg_csmv.F90 | 0 {gpu => cuda}/impl/psb_d_elg_csput.F90 | 0 {gpu => cuda}/impl/psb_d_elg_from_gpu.F90 | 0 .../impl/psb_d_elg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_d_elg_mold.F90 | 0 .../impl/psb_d_elg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_d_elg_scal.F90 | 0 {gpu => cuda}/impl/psb_d_elg_scals.F90 | 0 {gpu => cuda}/impl/psb_d_elg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_d_elg_trim.f90 | 0 {gpu => cuda}/impl/psb_d_elg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_d_hdiag_csmv.F90 | 0 {gpu => cuda}/impl/psb_d_hdiag_mold.F90 | 0 {gpu => cuda}/impl/psb_d_hdiag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_d_hdiag_vect_mv.F90 | 0 .../impl/psb_d_hlg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_csmm.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_csmv.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_from_gpu.F90 | 0 .../impl/psb_d_hlg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_mold.F90 | 0 .../impl/psb_d_hlg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_scal.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_scals.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_vect_mv.F90 | 0 .../impl/psb_d_hybg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_d_hybg_csmm.F90 | 0 {gpu => cuda}/impl/psb_d_hybg_csmv.F90 | 0 .../impl/psb_d_hybg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_d_hybg_mold.F90 | 0 .../impl/psb_d_hybg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_d_hybg_scal.F90 | 0 {gpu => cuda}/impl/psb_d_hybg_scals.F90 | 0 {gpu => cuda}/impl/psb_d_hybg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_d_hybg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_d_mv_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_mv_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_d_mv_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_mv_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_mv_elg_from_fmt.F90 | 0 .../impl/psb_d_mv_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_mv_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_mv_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_d_mv_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_mv_hybg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_s_cp_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_cp_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_s_cp_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_cp_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_cp_elg_from_fmt.F90 | 0 .../impl/psb_s_cp_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_cp_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_cp_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_s_cp_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_cp_hybg_from_fmt.F90 | 0 .../impl/psb_s_csrg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_csmm.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_csmv.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_from_gpu.F90 | 0 .../impl/psb_s_csrg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_mold.F90 | 0 .../impl/psb_s_csrg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_scal.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_scals.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_s_diag_csmv.F90 | 0 {gpu => cuda}/impl/psb_s_diag_mold.F90 | 0 {gpu => cuda}/impl/psb_s_diag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_s_diag_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_s_dnsg_mat_impl.F90 | 0 .../impl/psb_s_elg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_s_elg_asb.f90 | 0 {gpu => cuda}/impl/psb_s_elg_csmm.F90 | 0 {gpu => cuda}/impl/psb_s_elg_csmv.F90 | 0 {gpu => cuda}/impl/psb_s_elg_csput.F90 | 0 {gpu => cuda}/impl/psb_s_elg_from_gpu.F90 | 0 .../impl/psb_s_elg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_s_elg_mold.F90 | 0 .../impl/psb_s_elg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_s_elg_scal.F90 | 0 {gpu => cuda}/impl/psb_s_elg_scals.F90 | 0 {gpu => cuda}/impl/psb_s_elg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_s_elg_trim.f90 | 0 {gpu => cuda}/impl/psb_s_elg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_s_hdiag_csmv.F90 | 0 {gpu => cuda}/impl/psb_s_hdiag_mold.F90 | 0 {gpu => cuda}/impl/psb_s_hdiag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_s_hdiag_vect_mv.F90 | 0 .../impl/psb_s_hlg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_csmm.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_csmv.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_from_gpu.F90 | 0 .../impl/psb_s_hlg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_mold.F90 | 0 .../impl/psb_s_hlg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_scal.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_scals.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_vect_mv.F90 | 0 .../impl/psb_s_hybg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_s_hybg_csmm.F90 | 0 {gpu => cuda}/impl/psb_s_hybg_csmv.F90 | 0 .../impl/psb_s_hybg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_s_hybg_mold.F90 | 0 .../impl/psb_s_hybg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_s_hybg_scal.F90 | 0 {gpu => cuda}/impl/psb_s_hybg_scals.F90 | 0 {gpu => cuda}/impl/psb_s_hybg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_s_hybg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_s_mv_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_mv_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_s_mv_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_mv_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_mv_elg_from_fmt.F90 | 0 .../impl/psb_s_mv_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_mv_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_mv_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_s_mv_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_mv_hybg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_z_cp_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_cp_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_z_cp_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_cp_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_cp_elg_from_fmt.F90 | 0 .../impl/psb_z_cp_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_cp_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_cp_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_z_cp_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_cp_hybg_from_fmt.F90 | 0 .../impl/psb_z_csrg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_csmm.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_csmv.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_from_gpu.F90 | 0 .../impl/psb_z_csrg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_mold.F90 | 0 .../impl/psb_z_csrg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_scal.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_scals.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_z_diag_csmv.F90 | 0 {gpu => cuda}/impl/psb_z_diag_mold.F90 | 0 {gpu => cuda}/impl/psb_z_diag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_z_diag_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_z_dnsg_mat_impl.F90 | 0 .../impl/psb_z_elg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_z_elg_asb.f90 | 0 {gpu => cuda}/impl/psb_z_elg_csmm.F90 | 0 {gpu => cuda}/impl/psb_z_elg_csmv.F90 | 0 {gpu => cuda}/impl/psb_z_elg_csput.F90 | 0 {gpu => cuda}/impl/psb_z_elg_from_gpu.F90 | 0 .../impl/psb_z_elg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_z_elg_mold.F90 | 0 .../impl/psb_z_elg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_z_elg_scal.F90 | 0 {gpu => cuda}/impl/psb_z_elg_scals.F90 | 0 {gpu => cuda}/impl/psb_z_elg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_z_elg_trim.f90 | 0 {gpu => cuda}/impl/psb_z_elg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_z_hdiag_csmv.F90 | 0 {gpu => cuda}/impl/psb_z_hdiag_mold.F90 | 0 {gpu => cuda}/impl/psb_z_hdiag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_z_hdiag_vect_mv.F90 | 0 .../impl/psb_z_hlg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_csmm.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_csmv.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_from_gpu.F90 | 0 .../impl/psb_z_hlg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_mold.F90 | 0 .../impl/psb_z_hlg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_scal.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_scals.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_vect_mv.F90 | 0 .../impl/psb_z_hybg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_z_hybg_csmm.F90 | 0 {gpu => cuda}/impl/psb_z_hybg_csmv.F90 | 0 .../impl/psb_z_hybg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_z_hybg_mold.F90 | 0 .../impl/psb_z_hybg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_z_hybg_scal.F90 | 0 {gpu => cuda}/impl/psb_z_hybg_scals.F90 | 0 {gpu => cuda}/impl/psb_z_hybg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_z_hybg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_z_mv_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_mv_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_z_mv_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_mv_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_mv_elg_from_fmt.F90 | 0 .../impl/psb_z_mv_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_mv_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_mv_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_z_mv_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_mv_hybg_from_fmt.F90 | 0 {gpu => cuda}/ivectordev.c | 0 {gpu => cuda}/ivectordev.h | 0 {gpu => cuda}/psb_base_vectordev_mod.F90 | 0 {gpu => cuda}/psb_c_csrg_mat_mod.F90 | 0 {gpu => cuda}/psb_c_diag_mat_mod.F90 | 0 {gpu => cuda}/psb_c_dnsg_mat_mod.F90 | 0 {gpu => cuda}/psb_c_elg_mat_mod.F90 | 0 {gpu => cuda}/psb_c_gpu_vect_mod.F90 | 0 {gpu => cuda}/psb_c_hdiag_mat_mod.F90 | 0 {gpu => cuda}/psb_c_hlg_mat_mod.F90 | 0 {gpu => cuda}/psb_c_hybg_mat_mod.F90 | 0 {gpu => cuda}/psb_c_vectordev_mod.F90 | 0 {gpu => cuda}/psb_d_csrg_mat_mod.F90 | 0 {gpu => cuda}/psb_d_diag_mat_mod.F90 | 0 {gpu => cuda}/psb_d_dnsg_mat_mod.F90 | 0 {gpu => cuda}/psb_d_elg_mat_mod.F90 | 0 {gpu => cuda}/psb_d_gpu_vect_mod.F90 | 0 {gpu => cuda}/psb_d_hdiag_mat_mod.F90 | 0 {gpu => cuda}/psb_d_hlg_mat_mod.F90 | 0 {gpu => cuda}/psb_d_hybg_mat_mod.F90 | 0 {gpu => cuda}/psb_d_vectordev_mod.F90 | 0 {gpu => cuda}/psb_gpu_env_mod.F90 | 0 {gpu => cuda}/psb_gpu_mod.F90 | 0 {gpu => cuda}/psb_i_csrg_mat_mod.F90 | 0 {gpu => cuda}/psb_i_diag_mat_mod.F90 | 0 {gpu => cuda}/psb_i_dnsg_mat_mod.F90 | 0 {gpu => cuda}/psb_i_elg_mat_mod.F90 | 0 {gpu => cuda}/psb_i_gpu_vect_mod.F90 | 0 {gpu => cuda}/psb_i_hdiag_mat_mod.F90 | 0 {gpu => cuda}/psb_i_hlg_mat_mod.F90 | 0 {gpu => cuda}/psb_i_hybg_mat_mod.F90 | 0 {gpu => cuda}/psb_i_vectordev_mod.F90 | 0 {gpu => cuda}/psb_s_csrg_mat_mod.F90 | 0 {gpu => cuda}/psb_s_diag_mat_mod.F90 | 0 {gpu => cuda}/psb_s_dnsg_mat_mod.F90 | 0 {gpu => cuda}/psb_s_elg_mat_mod.F90 | 0 {gpu => cuda}/psb_s_gpu_vect_mod.F90 | 0 {gpu => cuda}/psb_s_hdiag_mat_mod.F90 | 0 {gpu => cuda}/psb_s_hlg_mat_mod.F90 | 0 {gpu => cuda}/psb_s_hybg_mat_mod.F90 | 0 {gpu => cuda}/psb_s_vectordev_mod.F90 | 0 {gpu => cuda}/psb_vectordev_mod.f90 | 0 {gpu => cuda}/psb_z_csrg_mat_mod.F90 | 0 {gpu => cuda}/psb_z_diag_mat_mod.F90 | 0 {gpu => cuda}/psb_z_dnsg_mat_mod.F90 | 0 {gpu => cuda}/psb_z_elg_mat_mod.F90 | 0 {gpu => cuda}/psb_z_gpu_vect_mod.F90 | 0 {gpu => cuda}/psb_z_hdiag_mat_mod.F90 | 0 {gpu => cuda}/psb_z_hlg_mat_mod.F90 | 0 {gpu => cuda}/psb_z_hybg_mat_mod.F90 | 0 {gpu => cuda}/psb_z_vectordev_mod.F90 | 0 {gpu => cuda}/s_cusparse_mod.F90 | 0 {gpu => cuda}/scusparse.c | 0 cuda/spgpu/Makefile | 39 + cuda/spgpu/coo.cpp | 98 ++ cuda/spgpu/coo_conv.h | 40 + cuda/spgpu/core.c | 109 ++ cuda/spgpu/core.h | 185 +++ cuda/spgpu/debug.h | 58 + cuda/spgpu/dia.c | 105 ++ cuda/spgpu/dia.h | 158 +++ cuda/spgpu/dia_conv.h | 66 + cuda/spgpu/ell.c | 202 +++ cuda/spgpu/ell.h | 321 +++++ cuda/spgpu/ell_conv.h | 110 ++ cuda/spgpu/hdia.cpp | 374 +++++ cuda/spgpu/hdia.h | 159 +++ cuda/spgpu/hdia_conv.h | 102 ++ cuda/spgpu/hell.c | 104 ++ cuda/spgpu/hell.h | 186 +++ cuda/spgpu/hell_conv.h | 80 ++ cuda/spgpu/kernels/Makefile | 33 + cuda/spgpu/kernels/abs_base.cuh | 110 ++ cuda/spgpu/kernels/amax_base.cuh | 233 ++++ cuda/spgpu/kernels/asum_base.cuh | 240 ++++ cuda/spgpu/kernels/axy_base.cuh | 210 +++ cuda/spgpu/kernels/cabs.cu | 33 + cuda/spgpu/kernels/camax.cu | 31 + cuda/spgpu/kernels/casum.cu | 31 + cuda/spgpu/kernels/caxpby.cu | 103 ++ cuda/spgpu/kernels/caxy.cu | 31 + cuda/spgpu/kernels/cdot.cu | 160 +++ cuda/spgpu/kernels/cgath.cu | 31 + cuda/spgpu/kernels/cnrm2.cu | 169 +++ cuda/spgpu/kernels/cscal.cu | 31 + cuda/spgpu/kernels/cscat.cu | 31 + cuda/spgpu/kernels/csetscal.cu | 31 + cuda/spgpu/kernels/cudadebug.h | 35 + cuda/spgpu/kernels/cudalang.h | 70 + cuda/spgpu/kernels/dabs.cu | 33 + cuda/spgpu/kernels/damax.cu | 32 + cuda/spgpu/kernels/dasum.cu | 32 + cuda/spgpu/kernels/daxpby.cu | 101 ++ cuda/spgpu/kernels/daxy.cu | 32 + cuda/spgpu/kernels/ddot.cu | 160 +++ cuda/spgpu/kernels/dgath.cu | 31 + cuda/spgpu/kernels/dia_cspmv.cu | 33 + cuda/spgpu/kernels/dia_dspmv.cu | 32 + cuda/spgpu/kernels/dia_spmv_base.cuh | 151 ++ cuda/spgpu/kernels/dia_spmv_base_template.cuh | 217 +++ cuda/spgpu/kernels/dia_sspmv.cu | 32 + cuda/spgpu/kernels/dia_zspmv.cu | 33 + cuda/spgpu/kernels/dnrm2.cu | 157 +++ cuda/spgpu/kernels/dscal.cu | 31 + cuda/spgpu/kernels/dscat.cu | 31 + cuda/spgpu/kernels/dsetscal.cu | 31 + cuda/spgpu/kernels/ell_ccsput.cu | 32 + cuda/spgpu/kernels/ell_cspmv.cu | 33 + cuda/spgpu/kernels/ell_csput_base.cuh | 126 ++ cuda/spgpu/kernels/ell_dcsput.cu | 31 + cuda/spgpu/kernels/ell_dspmv.cu | 32 + cuda/spgpu/kernels/ell_scsput.cu | 31 + cuda/spgpu/kernels/ell_spmv_base.cuh | 154 +++ cuda/spgpu/kernels/ell_spmv_base_nors.cuh | 340 +++++ cuda/spgpu/kernels/ell_spmv_base_template.cuh | 426 ++++++ cuda/spgpu/kernels/ell_sspmv.cu | 32 + cuda/spgpu/kernels/ell_zcsput.cu | 32 + cuda/spgpu/kernels/ell_zspmv.cu | 33 + cuda/spgpu/kernels/gath_base.cuh | 86 ++ cuda/spgpu/kernels/hdia_cspmv.cu | 32 + cuda/spgpu/kernels/hdia_dspmv.cu | 33 + cuda/spgpu/kernels/hdia_spmv_base.cuh | 149 ++ .../spgpu/kernels/hdia_spmv_base_template.cuh | 253 ++++ cuda/spgpu/kernels/hdia_sspmv.cu | 32 + cuda/spgpu/kernels/hdia_zspmv.cu | 33 + cuda/spgpu/kernels/hell_cspmv.cu | 32 + cuda/spgpu/kernels/hell_dspmv.cu | 32 + cuda/spgpu/kernels/hell_spmv_base.cuh | 159 +++ .../spgpu/kernels/hell_spmv_base_template.cuh | 357 +++++ cuda/spgpu/kernels/hell_sspmv.cu | 32 + cuda/spgpu/kernels/hell_zspmv.cu | 33 + cuda/spgpu/kernels/igath.cu | 31 + cuda/spgpu/kernels/iscat.cu | 31 + cuda/spgpu/kernels/isetscal.cu | 31 + cuda/spgpu/kernels/mathbase.cuh | 53 + cuda/spgpu/kernels/sabs.cu | 33 + cuda/spgpu/kernels/samax.cu | 32 + cuda/spgpu/kernels/sasum.cu | 32 + cuda/spgpu/kernels/saxpby.cu | 100 ++ cuda/spgpu/kernels/saxy.cu | 32 + cuda/spgpu/kernels/scal_base.cuh | 83 ++ cuda/spgpu/kernels/scat_base.cuh | 89 ++ cuda/spgpu/kernels/sdot.cu | 175 +++ cuda/spgpu/kernels/setscal_base.cuh | 82 ++ cuda/spgpu/kernels/sgath.cu | 31 + cuda/spgpu/kernels/snrm2.cu | 166 +++ cuda/spgpu/kernels/sscal.cu | 31 + cuda/spgpu/kernels/sscat.cu | 31 + cuda/spgpu/kernels/ssetscal.cu | 31 + cuda/spgpu/kernels/zabs.cu | 33 + cuda/spgpu/kernels/zamax.cu | 31 + cuda/spgpu/kernels/zasum.cu | 31 + cuda/spgpu/kernels/zaxpby.cu | 103 ++ cuda/spgpu/kernels/zaxy.cu | 31 + cuda/spgpu/kernels/zdot.cu | 148 ++ cuda/spgpu/kernels/zgath.cu | 31 + cuda/spgpu/kernels/znrm2.cu | 159 +++ cuda/spgpu/kernels/zscal.cu | 31 + cuda/spgpu/kernels/zscat.cu | 31 + cuda/spgpu/kernels/zsetscal.cu | 31 + cuda/spgpu/vector.h | 1231 +++++++++++++++++ {gpu => cuda}/svectordev.c | 0 {gpu => cuda}/svectordev.h | 0 {gpu => cuda}/vectordev.c | 0 {gpu => cuda}/vectordev.h | 0 {gpu => cuda}/z_cusparse_mod.F90 | 0 {gpu => cuda}/zcusparse.c | 0 {gpu => cuda}/zvectordev.c | 0 {gpu => cuda}/zvectordev.h | 0 test/gpukern/Makefile | 62 + test/gpukern/c_file_spmv.F90 | 491 +++++++ test/gpukern/d_file_spmv.F90 | 496 +++++++ test/gpukern/data_input.f90 | 221 +++ test/gpukern/dpdegenmv.F90 | 997 +++++++++++++ test/gpukern/s_file_spmv.F90 | 496 +++++++ test/gpukern/spdegenmv.F90 | 989 +++++++++++++ test/gpukern/z_file_spmv.F90 | 491 +++++++ 527 files changed, 15146 insertions(+), 232 deletions(-) rename {gpu => cuda}/CUDA/Makefile (93%) rename {gpu => cuda}/CUDA/psi_cuda_CopyCooToElg.cuh (100%) rename {gpu => cuda}/CUDA/psi_cuda_CopyCooToHlg.cuh (100%) rename {gpu => cuda}/CUDA/psi_cuda_c_CopyCooToElg.cu (100%) rename {gpu => cuda}/CUDA/psi_cuda_c_CopyCooToHlg.cu (100%) rename {gpu => cuda}/CUDA/psi_cuda_common.cuh (100%) rename {gpu => cuda}/CUDA/psi_cuda_d_CopyCooToElg.cu (100%) rename {gpu => cuda}/CUDA/psi_cuda_d_CopyCooToHlg.cu (100%) rename {gpu => cuda}/CUDA/psi_cuda_s_CopyCooToElg.cu (100%) rename {gpu => cuda}/CUDA/psi_cuda_s_CopyCooToHlg.cu (100%) rename {gpu => cuda}/CUDA/psi_cuda_z_CopyCooToElg.cu (100%) rename {gpu => cuda}/CUDA/psi_cuda_z_CopyCooToHlg.cu (100%) rename {gpu => cuda}/Makefile (93%) rename {gpu => cuda}/base_cusparse_mod.F90 (100%) rename {gpu => cuda}/c_cusparse_mod.F90 (100%) rename {gpu => cuda}/ccusparse.c (100%) rename {gpu => cuda}/cintrf.h (100%) rename {gpu => cuda}/core_mod.f90 (100%) rename {gpu => cuda}/cuda_util.c (100%) rename {gpu => cuda}/cuda_util.h (100%) rename {gpu => cuda}/cusparse_mod.F90 (100%) rename {gpu => cuda}/cvectordev.c (100%) rename {gpu => cuda}/cvectordev.h (100%) rename {gpu => cuda}/d_cusparse_mod.F90 (100%) rename {gpu => cuda}/dcusparse.c (100%) rename {gpu => cuda}/diagdev.c (100%) rename {gpu => cuda}/diagdev.h (100%) rename {gpu => cuda}/diagdev_mod.F90 (100%) rename {gpu => cuda}/dnsdev.c (100%) rename {gpu => cuda}/dnsdev.h (100%) rename {gpu => cuda}/dnsdev_mod.F90 (100%) rename {gpu => cuda}/dvectordev.c (100%) rename {gpu => cuda}/dvectordev.h (100%) rename {gpu => cuda}/elldev.c (100%) rename {gpu => cuda}/elldev.h (100%) rename {gpu => cuda}/elldev_mod.F90 (100%) rename {gpu => cuda}/fcusparse.c (100%) rename {gpu => cuda}/fcusparse.h (100%) rename {gpu => cuda}/fcusparse_fct.h (100%) rename {gpu => cuda}/hdiagdev.c (100%) rename {gpu => cuda}/hdiagdev.h (100%) rename {gpu => cuda}/hdiagdev_mod.F90 (100%) rename {gpu => cuda}/hlldev.c (100%) rename {gpu => cuda}/hlldev.h (100%) rename {gpu => cuda}/hlldev_mod.F90 (100%) rename {gpu => cuda}/impl/Makefile (97%) rename {gpu => cuda}/impl/psb_c_cp_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_c_diag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_c_diag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_c_diag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_diag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_c_dnsg_mat_impl.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_asb.f90 (100%) rename {gpu => cuda}/impl/psb_c_elg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_csput.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_trim.f90 (100%) rename {gpu => cuda}/impl/psb_c_elg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hdiag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hdiag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_c_hdiag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_hdiag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_d_diag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_d_diag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_d_diag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_diag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_d_dnsg_mat_impl.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_asb.f90 (100%) rename {gpu => cuda}/impl/psb_d_elg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_csput.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_trim.f90 (100%) rename {gpu => cuda}/impl/psb_d_elg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hdiag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hdiag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_d_hdiag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_hdiag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_s_diag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_s_diag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_s_diag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_diag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_s_dnsg_mat_impl.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_asb.f90 (100%) rename {gpu => cuda}/impl/psb_s_elg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_csput.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_trim.f90 (100%) rename {gpu => cuda}/impl/psb_s_elg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hdiag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hdiag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_s_hdiag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_hdiag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_z_diag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_z_diag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_z_diag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_diag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_z_dnsg_mat_impl.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_asb.f90 (100%) rename {gpu => cuda}/impl/psb_z_elg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_csput.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_trim.f90 (100%) rename {gpu => cuda}/impl/psb_z_elg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hdiag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hdiag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_z_hdiag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_hdiag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/ivectordev.c (100%) rename {gpu => cuda}/ivectordev.h (100%) rename {gpu => cuda}/psb_base_vectordev_mod.F90 (100%) rename {gpu => cuda}/psb_c_csrg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_c_diag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_c_dnsg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_c_elg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_c_gpu_vect_mod.F90 (100%) rename {gpu => cuda}/psb_c_hdiag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_c_hlg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_c_hybg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_c_vectordev_mod.F90 (100%) rename {gpu => cuda}/psb_d_csrg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_d_diag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_d_dnsg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_d_elg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_d_gpu_vect_mod.F90 (100%) rename {gpu => cuda}/psb_d_hdiag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_d_hlg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_d_hybg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_d_vectordev_mod.F90 (100%) rename {gpu => cuda}/psb_gpu_env_mod.F90 (100%) rename {gpu => cuda}/psb_gpu_mod.F90 (100%) rename {gpu => cuda}/psb_i_csrg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_i_diag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_i_dnsg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_i_elg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_i_gpu_vect_mod.F90 (100%) rename {gpu => cuda}/psb_i_hdiag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_i_hlg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_i_hybg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_i_vectordev_mod.F90 (100%) rename {gpu => cuda}/psb_s_csrg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_s_diag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_s_dnsg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_s_elg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_s_gpu_vect_mod.F90 (100%) rename {gpu => cuda}/psb_s_hdiag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_s_hlg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_s_hybg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_s_vectordev_mod.F90 (100%) rename {gpu => cuda}/psb_vectordev_mod.f90 (100%) rename {gpu => cuda}/psb_z_csrg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_z_diag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_z_dnsg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_z_elg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_z_gpu_vect_mod.F90 (100%) rename {gpu => cuda}/psb_z_hdiag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_z_hlg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_z_hybg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_z_vectordev_mod.F90 (100%) rename {gpu => cuda}/s_cusparse_mod.F90 (100%) rename {gpu => cuda}/scusparse.c (100%) create mode 100644 cuda/spgpu/Makefile create mode 100644 cuda/spgpu/coo.cpp create mode 100644 cuda/spgpu/coo_conv.h create mode 100644 cuda/spgpu/core.c create mode 100644 cuda/spgpu/core.h create mode 100644 cuda/spgpu/debug.h create mode 100644 cuda/spgpu/dia.c create mode 100644 cuda/spgpu/dia.h create mode 100644 cuda/spgpu/dia_conv.h create mode 100644 cuda/spgpu/ell.c create mode 100644 cuda/spgpu/ell.h create mode 100644 cuda/spgpu/ell_conv.h create mode 100644 cuda/spgpu/hdia.cpp create mode 100644 cuda/spgpu/hdia.h create mode 100644 cuda/spgpu/hdia_conv.h create mode 100644 cuda/spgpu/hell.c create mode 100644 cuda/spgpu/hell.h create mode 100644 cuda/spgpu/hell_conv.h create mode 100644 cuda/spgpu/kernels/Makefile create mode 100644 cuda/spgpu/kernels/abs_base.cuh create mode 100644 cuda/spgpu/kernels/amax_base.cuh create mode 100644 cuda/spgpu/kernels/asum_base.cuh create mode 100644 cuda/spgpu/kernels/axy_base.cuh create mode 100644 cuda/spgpu/kernels/cabs.cu create mode 100644 cuda/spgpu/kernels/camax.cu create mode 100644 cuda/spgpu/kernels/casum.cu create mode 100644 cuda/spgpu/kernels/caxpby.cu create mode 100644 cuda/spgpu/kernels/caxy.cu create mode 100644 cuda/spgpu/kernels/cdot.cu create mode 100644 cuda/spgpu/kernels/cgath.cu create mode 100644 cuda/spgpu/kernels/cnrm2.cu create mode 100644 cuda/spgpu/kernels/cscal.cu create mode 100644 cuda/spgpu/kernels/cscat.cu create mode 100644 cuda/spgpu/kernels/csetscal.cu create mode 100644 cuda/spgpu/kernels/cudadebug.h create mode 100644 cuda/spgpu/kernels/cudalang.h create mode 100644 cuda/spgpu/kernels/dabs.cu create mode 100644 cuda/spgpu/kernels/damax.cu create mode 100644 cuda/spgpu/kernels/dasum.cu create mode 100644 cuda/spgpu/kernels/daxpby.cu create mode 100644 cuda/spgpu/kernels/daxy.cu create mode 100644 cuda/spgpu/kernels/ddot.cu create mode 100644 cuda/spgpu/kernels/dgath.cu create mode 100644 cuda/spgpu/kernels/dia_cspmv.cu create mode 100644 cuda/spgpu/kernels/dia_dspmv.cu create mode 100644 cuda/spgpu/kernels/dia_spmv_base.cuh create mode 100644 cuda/spgpu/kernels/dia_spmv_base_template.cuh create mode 100644 cuda/spgpu/kernels/dia_sspmv.cu create mode 100644 cuda/spgpu/kernels/dia_zspmv.cu create mode 100644 cuda/spgpu/kernels/dnrm2.cu create mode 100644 cuda/spgpu/kernels/dscal.cu create mode 100644 cuda/spgpu/kernels/dscat.cu create mode 100644 cuda/spgpu/kernels/dsetscal.cu create mode 100644 cuda/spgpu/kernels/ell_ccsput.cu create mode 100644 cuda/spgpu/kernels/ell_cspmv.cu create mode 100644 cuda/spgpu/kernels/ell_csput_base.cuh create mode 100644 cuda/spgpu/kernels/ell_dcsput.cu create mode 100644 cuda/spgpu/kernels/ell_dspmv.cu create mode 100644 cuda/spgpu/kernels/ell_scsput.cu create mode 100644 cuda/spgpu/kernels/ell_spmv_base.cuh create mode 100644 cuda/spgpu/kernels/ell_spmv_base_nors.cuh create mode 100644 cuda/spgpu/kernels/ell_spmv_base_template.cuh create mode 100644 cuda/spgpu/kernels/ell_sspmv.cu create mode 100644 cuda/spgpu/kernels/ell_zcsput.cu create mode 100644 cuda/spgpu/kernels/ell_zspmv.cu create mode 100644 cuda/spgpu/kernels/gath_base.cuh create mode 100644 cuda/spgpu/kernels/hdia_cspmv.cu create mode 100644 cuda/spgpu/kernels/hdia_dspmv.cu create mode 100644 cuda/spgpu/kernels/hdia_spmv_base.cuh create mode 100644 cuda/spgpu/kernels/hdia_spmv_base_template.cuh create mode 100644 cuda/spgpu/kernels/hdia_sspmv.cu create mode 100644 cuda/spgpu/kernels/hdia_zspmv.cu create mode 100644 cuda/spgpu/kernels/hell_cspmv.cu create mode 100644 cuda/spgpu/kernels/hell_dspmv.cu create mode 100644 cuda/spgpu/kernels/hell_spmv_base.cuh create mode 100644 cuda/spgpu/kernels/hell_spmv_base_template.cuh create mode 100644 cuda/spgpu/kernels/hell_sspmv.cu create mode 100644 cuda/spgpu/kernels/hell_zspmv.cu create mode 100644 cuda/spgpu/kernels/igath.cu create mode 100644 cuda/spgpu/kernels/iscat.cu create mode 100644 cuda/spgpu/kernels/isetscal.cu create mode 100644 cuda/spgpu/kernels/mathbase.cuh create mode 100644 cuda/spgpu/kernels/sabs.cu create mode 100644 cuda/spgpu/kernels/samax.cu create mode 100644 cuda/spgpu/kernels/sasum.cu create mode 100644 cuda/spgpu/kernels/saxpby.cu create mode 100644 cuda/spgpu/kernels/saxy.cu create mode 100644 cuda/spgpu/kernels/scal_base.cuh create mode 100644 cuda/spgpu/kernels/scat_base.cuh create mode 100644 cuda/spgpu/kernels/sdot.cu create mode 100644 cuda/spgpu/kernels/setscal_base.cuh create mode 100644 cuda/spgpu/kernels/sgath.cu create mode 100644 cuda/spgpu/kernels/snrm2.cu create mode 100644 cuda/spgpu/kernels/sscal.cu create mode 100644 cuda/spgpu/kernels/sscat.cu create mode 100644 cuda/spgpu/kernels/ssetscal.cu create mode 100644 cuda/spgpu/kernels/zabs.cu create mode 100644 cuda/spgpu/kernels/zamax.cu create mode 100644 cuda/spgpu/kernels/zasum.cu create mode 100644 cuda/spgpu/kernels/zaxpby.cu create mode 100644 cuda/spgpu/kernels/zaxy.cu create mode 100644 cuda/spgpu/kernels/zdot.cu create mode 100644 cuda/spgpu/kernels/zgath.cu create mode 100644 cuda/spgpu/kernels/znrm2.cu create mode 100644 cuda/spgpu/kernels/zscal.cu create mode 100644 cuda/spgpu/kernels/zscat.cu create mode 100644 cuda/spgpu/kernels/zsetscal.cu create mode 100644 cuda/spgpu/vector.h rename {gpu => cuda}/svectordev.c (100%) rename {gpu => cuda}/svectordev.h (100%) rename {gpu => cuda}/vectordev.c (100%) rename {gpu => cuda}/vectordev.h (100%) rename {gpu => cuda}/z_cusparse_mod.F90 (100%) rename {gpu => cuda}/zcusparse.c (100%) rename {gpu => cuda}/zvectordev.c (100%) rename {gpu => cuda}/zvectordev.h (100%) create mode 100755 test/gpukern/Makefile create mode 100644 test/gpukern/c_file_spmv.F90 create mode 100644 test/gpukern/d_file_spmv.F90 create mode 100644 test/gpukern/data_input.f90 create mode 100644 test/gpukern/dpdegenmv.F90 create mode 100644 test/gpukern/s_file_spmv.F90 create mode 100644 test/gpukern/spdegenmv.F90 create mode 100644 test/gpukern/z_file_spmv.F90 diff --git a/Make.inc.in b/Make.inc.in index f0b21f91..ca0fa7f7 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -67,15 +67,15 @@ UTILMODNAME=@UTILMODNAME@ CBINDLIBNAME=libpsb_cbind.a -GPUD=@GPUD@ -GPULD=@GPULD@ -LGPU=@LGPU@ +CUDAD=@CUDAD@ +CUDALD=@CUDALD@ +LCUDA=@LCUDA@ -SPGPUDIR=@SPGPU_DIR@ -SPGPU_INCDIR=@SPGPU_INCDIR@ +#SPGPUDIR=@SPGPU_DIR@ +#SPGPU_INCDIR=@SPGPU_INCDIR@ SPGPU_LIBS=@SPGPU_LIBS@ SPGPU_DEFINES=@SPGPU_DEFINES@ -SPGPU_INCLUDES=@SPGPU_INCLUDES@ +#SPGPU_INCLUDES=@SPGPU_INCLUDES@ CUDA_DIR=@CUDA_DIR@ CUDA_DEFINES=@CUDA_DEFINES@ @@ -93,4 +93,4 @@ CUDEFINES=@CUDEFINES@ @PSBLASRULES@ -PSBGPULDLIBS=$(LGPU) $(SPGPU_LIBS) $(CUDA_LIBS) $(LIBS) \ No newline at end of file +PSBGPULDLIBS=$(LCUDA) $(SPGPU_LIBS) $(CUDA_LIBS) $(PSBLDLIBS) $(LIBS) \ No newline at end of file diff --git a/Makefile b/Makefile index 49879270..95f4cb17 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ include Make.inc -all: dirs based precd kryld utild cbindd extd $(GPUD) libd +all: dirs based precd kryld utild cbindd extd $(CUDAD) libd @echo "=====================================" @echo "PSBLAS libraries Compilation Successful." @@ -13,18 +13,18 @@ precd: based utild: based kryld: precd extd: based -gpud: extd +cudad: extd cbindd: based precd kryld utild -libd: based precd kryld utild cbindd extd $(GPULD) +libd: based precd kryld utild cbindd extd $(CUDALD) $(MAKE) -C base lib $(MAKE) -C prec lib $(MAKE) -C krylov lib $(MAKE) -C util lib $(MAKE) -C cbind lib $(MAKE) -C ext lib -gpuld: gpud - $(MAKE) -C gpu lib +cudald: cudad + $(MAKE) -C cuda lib based: @@ -39,8 +39,8 @@ cbindd: $(MAKE) -C cbind objs extd: based $(MAKE) -C ext objs -gpud: based extd - $(MAKE) -C gpu objs +cudad: based extd + $(MAKE) -C cuda objs install: all @@ -66,7 +66,7 @@ clean: $(MAKE) -C util clean $(MAKE) -C cbind clean $(MAKE) -C ext clean - $(MAKE) -C gpu clean + $(MAKE) -C cuda clean check: all make check -C test/serial @@ -83,7 +83,7 @@ veryclean: cleanlib cd util && $(MAKE) veryclean cd cbind && $(MAKE) veryclean cd ext && $(MAKE) veryclean - cd gpu && $(MAKE) veryclean + cd cuda && $(MAKE) veryclean cd test/fileread && $(MAKE) clean cd test/pargen && $(MAKE) clean cd test/util && $(MAKE) clean diff --git a/configure b/configure index d2fa80d2..8b217231 100755 --- a/configure +++ b/configure @@ -653,9 +653,9 @@ ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS LIBOBJS -LGPU -GPULD -GPUD +LCUDA +CUDALD +CUDAD CUDEFINES CUDA_NVCC CUDA_SHORT_VERSION @@ -665,10 +665,7 @@ CUDA_INCLUDES CUDA_DEFINES CUDA_DIR EXTRALDLIBS -SPGPU_INCDIR -SPGPU_INCLUDES SPGPU_DEFINES -SPGPU_DIR SPGPU_LIBS SPGPU_FLAGS METISINCFILE @@ -844,7 +841,6 @@ with_amddir with_amdincdir with_amdlibdir with_cuda -with_spgpu with_cudacc ' ac_precious_vars='build_alias @@ -1545,8 +1541,6 @@ Optional Packages: --with-amdincdir=DIR Specify the directory for AMD includes. --with-amdlibdir=DIR Specify the directory for AMD library. --with-cuda=DIR Specify the directory for CUDA library and includes. - --with-spgpu=DIR Specify the directory for SPGPU library and - includes. --with-cudacc A comma-separated list of CCs to compile to, for example, --with-cudacc=30,35,37,50,60 @@ -10851,181 +10845,16 @@ CPPFLAGS="$SAVE_CPPFLAGS" CUDA_VERSION="$pac_cv_cuda_version"; CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); -SAVE_LIBS="$LIBS" - SAVE_CPPFLAGS="$CPPFLAGS" - if test "x$pac_cv_have_cuda" == "x"; then - -# Check whether --with-cuda was given. -if test ${with_cuda+y} -then : - withval=$with_cuda; pac_cv_cuda_dir=$withval -else $as_nop - pac_cv_cuda_dir='' -fi - - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - -SAVE_LIBS="$LIBS" -SAVE_CPPFLAGS="$CPPFLAGS" -if test "x$pac_cv_cuda_dir" != "x"; then - CUDA_DIR="$pac_cv_cuda_dir" - LIBS="-L$pac_cv_cuda_dir/lib $LIBS" - CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" - CUDA_DEFINES="-DHAVE_CUDA" - CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" - CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" - if test -f "$pac_cv_cuda_dir/bin/nvcc"; then - CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" - else - CUDA_NVCC="nvcc" - fi -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cuda dir $pac_cv_cuda_dir" >&5 -printf %s "checking cuda dir $pac_cv_cuda_dir... " >&6; } -ac_fn_c_check_header_compile "$LINENO" "cuda_runtime.h" "ac_cv_header_cuda_runtime_h" "$ac_includes_default" -if test "x$ac_cv_header_cuda_runtime_h" = xyes -then : - pac_cuda_header_ok=yes -else $as_nop - pac_cuda_header_ok=no; CUDA_INCLUDES="" -fi - - -if test "x$pac_cuda_header_ok" == "xyes" ; then - CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" - LIBS="$CUDA_LIBS -lm $LIBS"; - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cudaMemcpy in $CUDA_LIBS" >&5 -printf %s "checking for cudaMemcpy in $CUDA_LIBS... " >&6; } - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char cudaMemcpy (); -int -main (void) -{ -return cudaMemcpy (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; -else $as_nop - pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS="" -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cuda_lib_ok" >&5 -printf "%s\n" "$pac_cuda_lib_ok" >&6; } - -fi -LIBS="$SAVE_LIBS" -CPPFLAGS="$SAVE_CPPFLAGS" - - fi - if test "x$pac_cv_have_cuda" == "xyes"; then - -# Check whether --with-spgpu was given. -if test ${with_spgpu+y} -then : - withval=$with_spgpu; pac_cv_spgpudir=$withval -else $as_nop - pac_cv_spgpudir='' -fi - - - ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - - if test "x$pac_cv_spgpudir" != "x"; then - LIBS="-L$pac_cv_spgpudir/lib $LIBS" - GPU_INCLUDES="-I$pac_cv_spgpudir/include" - CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS" - GPU_LIBDIR="-L$pac_cv_spgpudir/lib" - fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking spgpu dir $pac_cv_spgpudir" >&5 -printf %s "checking spgpu dir $pac_cv_spgpudir... " >&6; } - ac_fn_c_check_header_compile "$LINENO" "core.h" "ac_cv_header_core_h" "$ac_includes_default" -if test "x$ac_cv_header_core_h" = xyes -then : - pac_gpu_header_ok=yes -else $as_nop - pac_gpu_header_ok=no; GPU_INCLUDES="" -fi - - - if test "x$pac_gpu_header_ok" == "xyes" ; then - GPU_LIBS="-lspgpu $GPU_LIBDIR" - LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS"; - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for spgpuCreate in $GPU_LIBS" >&5 -printf %s "checking for spgpuCreate in $GPU_LIBS... " >&6; } - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char spgpuCreate (); -int -main (void) -{ -return spgpuCreate (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; -else $as_nop - pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS="" -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_gpu_lib_ok" >&5 -printf "%s\n" "$pac_gpu_lib_ok" >&6; } - if test "x$pac_cv_have_spgpu" == "xyes" ; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Have found SPGPU" >&5 -printf "%s\n" "$as_me: Have found SPGPU" >&6;} - SPGPULIBNAME="libpsbgpu.a"; - SPGPU_DIR="$pac_cv_spgpudir"; - SPGPU_DEFINES="-DHAVE_SPGPU"; - SPGPU_INCDIR="$SPGPU_DIR/include"; - SPGPU_INCLUDES="-I$SPGPU_INCDIR"; - SPGPU_LIBS="-lspgpu -L$SPGPU_DIR/lib"; - LGPU=-lpsb_gpu - CUDA_DIR="$pac_cv_cuda_dir"; - CUDA_DEFINES="-DHAVE_CUDA"; - CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" - CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" - FDEFINES="$psblas_cv_define_prepend-DHAVE_GPU $psblas_cv_define_prepend-DHAVE_SPGPU $psblas_cv_define_prepend-DHAVE_CUDA $FDEFINES"; - CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES" ; - fi - fi -fi -LIBS="$SAVE_LIBS" -CPPFLAGS="$SAVE_CPPFLAGS" - -if test "x$pac_cv_have_spgpu" == "xyes" ; then - GPUD=gpud; - GPULD=gpuld; - LGPU="-lpsb_gpu"; +if test "x$pac_cv_have_cuda" == "xyes" ; then + SPGPU_DEFINES="-DHAVE_SPGPU -DHAVE_GPU"; + SPGPU_LIBS="-lspgpu"; + CUDAD=cudad; + CUDALD=cudald; + LCUDA="-lpsb_cuda"; EXTRALDLIBS="-lstdc++"; fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: At this point GPUTARGET is $GPUD $GPULD" >&5 -printf "%s\n" "$as_me: At this point GPUTARGET is $GPUD $GPULD" >&6;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: At this point GPUTARGET is $CUDAD $CUDALD" >&5 +printf "%s\n" "$as_me: At this point GPUTARGET is $CUDAD $CUDALD" >&6;} @@ -11041,7 +10870,7 @@ fi if test "x$pac_cv_cudacc" == "x"; then pac_cv_cudacc="30,35,37,50,60"; fi -CUDEFINES=""; +CUDEFINES="--dopt=on"; for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` do CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; @@ -11057,8 +10886,10 @@ fi if test "x$pac_cv_ipk_size" != "x4"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&5 printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&6;} - GPUD=""; - GPULD=""; + SPGPU_DEFINES=""; + SPGPU_LIBS=""; + CUDAD=""; + CUDALD=""; CUDEFINES=""; CUDA_INCLUDES=""; CUDA_LIBS=""; @@ -11124,9 +10955,9 @@ UTILLIBNAME=libpsb_util.a PSBLASRULES=' PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) -CXXDEFINES=$(PSBCXXDEFINES) -CDEFINES=$(PSBCDEFINES) -FDEFINES=$(PSBFDEFINES) +CXXDEFINES=$(PSBCXXDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) # These should be portable rules, arent they? @@ -11159,9 +10990,6 @@ FDEFINES=$(PSBFDEFINES) - - - diff --git a/configure.ac b/configure.ac index cb23c6ac..bfc9b5f9 100755 --- a/configure.ac +++ b/configure.ac @@ -798,20 +798,22 @@ if test "x$pac_cv_have_cuda" == "xyes"; then PAC_CHECK_CUDA_VERSION() CUDA_VERSION="$pac_cv_cuda_version"; CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); -PAC_CHECK_SPGPU() -if test "x$pac_cv_have_spgpu" == "xyes" ; then - GPUD=gpud; - GPULD=gpuld; - LGPU="-lpsb_gpu"; +dnl PAC_CHECK_SPGPU() +if test "x$pac_cv_have_cuda" == "xyes" ; then + SPGPU_DEFINES="-DHAVE_SPGPU -DHAVE_GPU"; + SPGPU_LIBS="-lspgpu"; + CUDAD=cudad; + CUDALD=cudald; + LCUDA="-lpsb_cuda"; EXTRALDLIBS="-lstdc++"; fi -AC_MSG_NOTICE([At this point GPUTARGET is $GPUD $GPULD]) +AC_MSG_NOTICE([At this point GPUTARGET is $CUDAD $CUDALD]) PAC_ARG_WITH_CUDACC() if test "x$pac_cv_cudacc" == "x"; then pac_cv_cudacc="30,35,37,50,60"; fi -CUDEFINES=""; +CUDEFINES="--dopt=on"; for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` do CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; @@ -826,8 +828,10 @@ fi if test "x$pac_cv_ipk_size" != "x4"; then AC_MSG_NOTICE([For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU]) - GPUD=""; - GPULD=""; + SPGPU_DEFINES=""; + SPGPU_LIBS=""; + CUDAD=""; + CUDALD=""; CUDEFINES=""; CUDA_INCLUDES=""; CUDA_LIBS=""; @@ -893,9 +897,9 @@ AC_SUBST(FINCLUDES) PSBLASRULES=' PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) -CXXDEFINES=$(PSBCXXDEFINES) -CDEFINES=$(PSBCDEFINES) -FDEFINES=$(PSBFDEFINES) +CXXDEFINES=$(PSBCXXDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) # These should be portable rules, arent they? @@ -919,10 +923,10 @@ AC_SUBST(UTILLIBNAME) AC_SUBST(METISINCFILE) AC_SUBST(SPGPU_FLAGS) AC_SUBST(SPGPU_LIBS) -AC_SUBST(SPGPU_DIR) +dnl AC_SUBST(SPGPU_DIR) AC_SUBST(SPGPU_DEFINES) -AC_SUBST(SPGPU_INCLUDES) -AC_SUBST(SPGPU_INCDIR) +dnl AC_SUBST(SPGPU_INCLUDES) +dnl AC_SUBST(SPGPU_INCDIR) AC_SUBST(EXTRALDLIBS) AC_SUBST(CUDA_DIR) AC_SUBST(CUDA_DEFINES) @@ -932,9 +936,9 @@ AC_SUBST(CUDA_VERSION) AC_SUBST(CUDA_SHORT_VERSION) AC_SUBST(CUDA_NVCC) AC_SUBST(CUDEFINES) -AC_SUBST(GPUD) -AC_SUBST(GPULD) -AC_SUBST(LGPU) +AC_SUBST(CUDAD) +AC_SUBST(CUDALD) +AC_SUBST(LCUDA) ############################################################################### # the following files will be created by Automake diff --git a/gpu/CUDA/Makefile b/cuda/CUDA/Makefile similarity index 93% rename from gpu/CUDA/Makefile rename to cuda/CUDA/Makefile index a1f9d48b..5bdfb935 100644 --- a/gpu/CUDA/Makefile +++ b/cuda/CUDA/Makefile @@ -15,11 +15,10 @@ LDLIBS=$(PSBLDLIBS) # #CCOPT= -g FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(PSBINCDIR) $(FIFLAG). -CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I.. +CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I.. -I$(INCDIR) LIBNAME=libpsb_gpu.a - CUDAOBJS=psi_cuda_c_CopyCooToElg.o psi_cuda_c_CopyCooToHlg.o \ psi_cuda_d_CopyCooToElg.o psi_cuda_d_CopyCooToHlg.o \ psi_cuda_s_CopyCooToElg.o psi_cuda_s_CopyCooToHlg.o \ diff --git a/gpu/CUDA/psi_cuda_CopyCooToElg.cuh b/cuda/CUDA/psi_cuda_CopyCooToElg.cuh similarity index 100% rename from gpu/CUDA/psi_cuda_CopyCooToElg.cuh rename to cuda/CUDA/psi_cuda_CopyCooToElg.cuh diff --git a/gpu/CUDA/psi_cuda_CopyCooToHlg.cuh b/cuda/CUDA/psi_cuda_CopyCooToHlg.cuh similarity index 100% rename from gpu/CUDA/psi_cuda_CopyCooToHlg.cuh rename to cuda/CUDA/psi_cuda_CopyCooToHlg.cuh diff --git a/gpu/CUDA/psi_cuda_c_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_c_CopyCooToElg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_c_CopyCooToElg.cu rename to cuda/CUDA/psi_cuda_c_CopyCooToElg.cu diff --git a/gpu/CUDA/psi_cuda_c_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_c_CopyCooToHlg.cu rename to cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu diff --git a/gpu/CUDA/psi_cuda_common.cuh b/cuda/CUDA/psi_cuda_common.cuh similarity index 100% rename from gpu/CUDA/psi_cuda_common.cuh rename to cuda/CUDA/psi_cuda_common.cuh diff --git a/gpu/CUDA/psi_cuda_d_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_d_CopyCooToElg.cu rename to cuda/CUDA/psi_cuda_d_CopyCooToElg.cu diff --git a/gpu/CUDA/psi_cuda_d_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_d_CopyCooToHlg.cu rename to cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu diff --git a/gpu/CUDA/psi_cuda_s_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_s_CopyCooToElg.cu rename to cuda/CUDA/psi_cuda_s_CopyCooToElg.cu diff --git a/gpu/CUDA/psi_cuda_s_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_s_CopyCooToHlg.cu rename to cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu diff --git a/gpu/CUDA/psi_cuda_z_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_z_CopyCooToElg.cu rename to cuda/CUDA/psi_cuda_z_CopyCooToElg.cu diff --git a/gpu/CUDA/psi_cuda_z_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_z_CopyCooToHlg.cu rename to cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu diff --git a/gpu/Makefile b/cuda/Makefile similarity index 93% rename from gpu/Makefile rename to cuda/Makefile index 10e17d55..2b0c011a 100755 --- a/gpu/Makefile +++ b/cuda/Makefile @@ -13,8 +13,8 @@ LDLIBS=$(PSBLDLIBS) # #CCOPT= -g FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG). -CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -LIBNAME=libpsb_gpu.a +CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I$(INCDIR) +LIBNAME=libpsb_cuda.a FOBJS=cusparse_mod.o base_cusparse_mod.o \ @@ -49,11 +49,18 @@ OBJS=$(COBJS) $(FOBJS) lib: objs -objs: $(OBJS) iobjs cudaobjs +objs: spgpuinc $(OBJS) iobjs cudaobjs spgpuobjs /bin/cp -p *$(.mod) $(MODDIR) /bin/cp -p *.h $(INCDIR) -lib: ilib cudalib +spgpuinc: + $(MAKE) -C spgpu includes +spgpuobjs: + $(MAKE) -C spgpu objs +spgpulib: + $(MAKE) -C spgpu lib + +lib: ilib cudalib spgpulib ar cur $(LIBNAME) $(OBJS) /bin/cp -p $(LIBNAME) $(LIBDIR) @@ -121,7 +128,7 @@ ilib: objs cudalib: objs ilib $(MAKE) -C CUDA lib LIBNAME=$(LIBNAME) -clean: cclean iclean cudaclean +clean: cclean iclean cudaclean spgpuclean /bin/rm -f $(FOBJS) *$(.mod) *.a cclean: @@ -130,5 +137,7 @@ iclean: $(MAKE) -C impl clean cudaclean: $(MAKE) -C CUDA clean +spgpuclean: + $(MAKE) -C spgpu clean veryclean: clean diff --git a/gpu/base_cusparse_mod.F90 b/cuda/base_cusparse_mod.F90 similarity index 100% rename from gpu/base_cusparse_mod.F90 rename to cuda/base_cusparse_mod.F90 diff --git a/gpu/c_cusparse_mod.F90 b/cuda/c_cusparse_mod.F90 similarity index 100% rename from gpu/c_cusparse_mod.F90 rename to cuda/c_cusparse_mod.F90 diff --git a/gpu/ccusparse.c b/cuda/ccusparse.c similarity index 100% rename from gpu/ccusparse.c rename to cuda/ccusparse.c diff --git a/gpu/cintrf.h b/cuda/cintrf.h similarity index 100% rename from gpu/cintrf.h rename to cuda/cintrf.h diff --git a/gpu/core_mod.f90 b/cuda/core_mod.f90 similarity index 100% rename from gpu/core_mod.f90 rename to cuda/core_mod.f90 diff --git a/gpu/cuda_util.c b/cuda/cuda_util.c similarity index 100% rename from gpu/cuda_util.c rename to cuda/cuda_util.c diff --git a/gpu/cuda_util.h b/cuda/cuda_util.h similarity index 100% rename from gpu/cuda_util.h rename to cuda/cuda_util.h diff --git a/gpu/cusparse_mod.F90 b/cuda/cusparse_mod.F90 similarity index 100% rename from gpu/cusparse_mod.F90 rename to cuda/cusparse_mod.F90 diff --git a/gpu/cvectordev.c b/cuda/cvectordev.c similarity index 100% rename from gpu/cvectordev.c rename to cuda/cvectordev.c diff --git a/gpu/cvectordev.h b/cuda/cvectordev.h similarity index 100% rename from gpu/cvectordev.h rename to cuda/cvectordev.h diff --git a/gpu/d_cusparse_mod.F90 b/cuda/d_cusparse_mod.F90 similarity index 100% rename from gpu/d_cusparse_mod.F90 rename to cuda/d_cusparse_mod.F90 diff --git a/gpu/dcusparse.c b/cuda/dcusparse.c similarity index 100% rename from gpu/dcusparse.c rename to cuda/dcusparse.c diff --git a/gpu/diagdev.c b/cuda/diagdev.c similarity index 100% rename from gpu/diagdev.c rename to cuda/diagdev.c diff --git a/gpu/diagdev.h b/cuda/diagdev.h similarity index 100% rename from gpu/diagdev.h rename to cuda/diagdev.h diff --git a/gpu/diagdev_mod.F90 b/cuda/diagdev_mod.F90 similarity index 100% rename from gpu/diagdev_mod.F90 rename to cuda/diagdev_mod.F90 diff --git a/gpu/dnsdev.c b/cuda/dnsdev.c similarity index 100% rename from gpu/dnsdev.c rename to cuda/dnsdev.c diff --git a/gpu/dnsdev.h b/cuda/dnsdev.h similarity index 100% rename from gpu/dnsdev.h rename to cuda/dnsdev.h diff --git a/gpu/dnsdev_mod.F90 b/cuda/dnsdev_mod.F90 similarity index 100% rename from gpu/dnsdev_mod.F90 rename to cuda/dnsdev_mod.F90 diff --git a/gpu/dvectordev.c b/cuda/dvectordev.c similarity index 100% rename from gpu/dvectordev.c rename to cuda/dvectordev.c diff --git a/gpu/dvectordev.h b/cuda/dvectordev.h similarity index 100% rename from gpu/dvectordev.h rename to cuda/dvectordev.h diff --git a/gpu/elldev.c b/cuda/elldev.c similarity index 100% rename from gpu/elldev.c rename to cuda/elldev.c diff --git a/gpu/elldev.h b/cuda/elldev.h similarity index 100% rename from gpu/elldev.h rename to cuda/elldev.h diff --git a/gpu/elldev_mod.F90 b/cuda/elldev_mod.F90 similarity index 100% rename from gpu/elldev_mod.F90 rename to cuda/elldev_mod.F90 diff --git a/gpu/fcusparse.c b/cuda/fcusparse.c similarity index 100% rename from gpu/fcusparse.c rename to cuda/fcusparse.c diff --git a/gpu/fcusparse.h b/cuda/fcusparse.h similarity index 100% rename from gpu/fcusparse.h rename to cuda/fcusparse.h diff --git a/gpu/fcusparse_fct.h b/cuda/fcusparse_fct.h similarity index 100% rename from gpu/fcusparse_fct.h rename to cuda/fcusparse_fct.h diff --git a/gpu/hdiagdev.c b/cuda/hdiagdev.c similarity index 100% rename from gpu/hdiagdev.c rename to cuda/hdiagdev.c diff --git a/gpu/hdiagdev.h b/cuda/hdiagdev.h similarity index 100% rename from gpu/hdiagdev.h rename to cuda/hdiagdev.h diff --git a/gpu/hdiagdev_mod.F90 b/cuda/hdiagdev_mod.F90 similarity index 100% rename from gpu/hdiagdev_mod.F90 rename to cuda/hdiagdev_mod.F90 diff --git a/gpu/hlldev.c b/cuda/hlldev.c similarity index 100% rename from gpu/hlldev.c rename to cuda/hlldev.c diff --git a/gpu/hlldev.h b/cuda/hlldev.h similarity index 100% rename from gpu/hlldev.h rename to cuda/hlldev.h diff --git a/gpu/hlldev_mod.F90 b/cuda/hlldev_mod.F90 similarity index 100% rename from gpu/hlldev_mod.F90 rename to cuda/hlldev_mod.F90 diff --git a/gpu/impl/Makefile b/cuda/impl/Makefile similarity index 97% rename from gpu/impl/Makefile rename to cuda/impl/Makefile index 158066f2..6ddac9a7 100755 --- a/gpu/impl/Makefile +++ b/cuda/impl/Makefile @@ -12,6 +12,9 @@ LDLIBS=$(PSBLDLIBS) FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. CINCLUDES=-I$(GPU_INCDIR) -I$(CUDA_INCDIR) LIBNAME=libpsb_gpu.a +CXXDEFINES=$(PSBCXXDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) OBJS= \ psb_d_cp_csrg_from_coo.o \ diff --git a/gpu/impl/psb_c_cp_csrg_from_coo.F90 b/cuda/impl/psb_c_cp_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_cp_csrg_from_coo.F90 rename to cuda/impl/psb_c_cp_csrg_from_coo.F90 diff --git a/gpu/impl/psb_c_cp_csrg_from_fmt.F90 b/cuda/impl/psb_c_cp_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_c_cp_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_c_cp_diag_from_coo.F90 b/cuda/impl/psb_c_cp_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_cp_diag_from_coo.F90 rename to cuda/impl/psb_c_cp_diag_from_coo.F90 diff --git a/gpu/impl/psb_c_cp_elg_from_coo.F90 b/cuda/impl/psb_c_cp_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_cp_elg_from_coo.F90 rename to cuda/impl/psb_c_cp_elg_from_coo.F90 diff --git a/gpu/impl/psb_c_cp_elg_from_fmt.F90 b/cuda/impl/psb_c_cp_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_cp_elg_from_fmt.F90 rename to cuda/impl/psb_c_cp_elg_from_fmt.F90 diff --git a/gpu/impl/psb_c_cp_hdiag_from_coo.F90 b/cuda/impl/psb_c_cp_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_c_cp_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_c_cp_hlg_from_coo.F90 b/cuda/impl/psb_c_cp_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_cp_hlg_from_coo.F90 rename to cuda/impl/psb_c_cp_hlg_from_coo.F90 diff --git a/gpu/impl/psb_c_cp_hlg_from_fmt.F90 b/cuda/impl/psb_c_cp_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_c_cp_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_c_cp_hybg_from_coo.F90 b/cuda/impl/psb_c_cp_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_cp_hybg_from_coo.F90 rename to cuda/impl/psb_c_cp_hybg_from_coo.F90 diff --git a/gpu/impl/psb_c_cp_hybg_from_fmt.F90 b/cuda/impl/psb_c_cp_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_c_cp_hybg_from_fmt.F90 diff --git a/gpu/impl/psb_c_csrg_allocate_mnnz.F90 b/cuda/impl/psb_c_csrg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_c_csrg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_c_csrg_csmm.F90 b/cuda/impl/psb_c_csrg_csmm.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_csmm.F90 rename to cuda/impl/psb_c_csrg_csmm.F90 diff --git a/gpu/impl/psb_c_csrg_csmv.F90 b/cuda/impl/psb_c_csrg_csmv.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_csmv.F90 rename to cuda/impl/psb_c_csrg_csmv.F90 diff --git a/gpu/impl/psb_c_csrg_from_gpu.F90 b/cuda/impl/psb_c_csrg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_from_gpu.F90 rename to cuda/impl/psb_c_csrg_from_gpu.F90 diff --git a/gpu/impl/psb_c_csrg_inner_vect_sv.F90 b/cuda/impl/psb_c_csrg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_c_csrg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_c_csrg_mold.F90 b/cuda/impl/psb_c_csrg_mold.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_mold.F90 rename to cuda/impl/psb_c_csrg_mold.F90 diff --git a/gpu/impl/psb_c_csrg_reallocate_nz.F90 b/cuda/impl/psb_c_csrg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_reallocate_nz.F90 rename to cuda/impl/psb_c_csrg_reallocate_nz.F90 diff --git a/gpu/impl/psb_c_csrg_scal.F90 b/cuda/impl/psb_c_csrg_scal.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_scal.F90 rename to cuda/impl/psb_c_csrg_scal.F90 diff --git a/gpu/impl/psb_c_csrg_scals.F90 b/cuda/impl/psb_c_csrg_scals.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_scals.F90 rename to cuda/impl/psb_c_csrg_scals.F90 diff --git a/gpu/impl/psb_c_csrg_to_gpu.F90 b/cuda/impl/psb_c_csrg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_to_gpu.F90 rename to cuda/impl/psb_c_csrg_to_gpu.F90 diff --git a/gpu/impl/psb_c_csrg_vect_mv.F90 b/cuda/impl/psb_c_csrg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_vect_mv.F90 rename to cuda/impl/psb_c_csrg_vect_mv.F90 diff --git a/gpu/impl/psb_c_diag_csmv.F90 b/cuda/impl/psb_c_diag_csmv.F90 similarity index 100% rename from gpu/impl/psb_c_diag_csmv.F90 rename to cuda/impl/psb_c_diag_csmv.F90 diff --git a/gpu/impl/psb_c_diag_mold.F90 b/cuda/impl/psb_c_diag_mold.F90 similarity index 100% rename from gpu/impl/psb_c_diag_mold.F90 rename to cuda/impl/psb_c_diag_mold.F90 diff --git a/gpu/impl/psb_c_diag_to_gpu.F90 b/cuda/impl/psb_c_diag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_diag_to_gpu.F90 rename to cuda/impl/psb_c_diag_to_gpu.F90 diff --git a/gpu/impl/psb_c_diag_vect_mv.F90 b/cuda/impl/psb_c_diag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_c_diag_vect_mv.F90 rename to cuda/impl/psb_c_diag_vect_mv.F90 diff --git a/gpu/impl/psb_c_dnsg_mat_impl.F90 b/cuda/impl/psb_c_dnsg_mat_impl.F90 similarity index 100% rename from gpu/impl/psb_c_dnsg_mat_impl.F90 rename to cuda/impl/psb_c_dnsg_mat_impl.F90 diff --git a/gpu/impl/psb_c_elg_allocate_mnnz.F90 b/cuda/impl/psb_c_elg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_c_elg_allocate_mnnz.F90 rename to cuda/impl/psb_c_elg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_c_elg_asb.f90 b/cuda/impl/psb_c_elg_asb.f90 similarity index 100% rename from gpu/impl/psb_c_elg_asb.f90 rename to cuda/impl/psb_c_elg_asb.f90 diff --git a/gpu/impl/psb_c_elg_csmm.F90 b/cuda/impl/psb_c_elg_csmm.F90 similarity index 100% rename from gpu/impl/psb_c_elg_csmm.F90 rename to cuda/impl/psb_c_elg_csmm.F90 diff --git a/gpu/impl/psb_c_elg_csmv.F90 b/cuda/impl/psb_c_elg_csmv.F90 similarity index 100% rename from gpu/impl/psb_c_elg_csmv.F90 rename to cuda/impl/psb_c_elg_csmv.F90 diff --git a/gpu/impl/psb_c_elg_csput.F90 b/cuda/impl/psb_c_elg_csput.F90 similarity index 100% rename from gpu/impl/psb_c_elg_csput.F90 rename to cuda/impl/psb_c_elg_csput.F90 diff --git a/gpu/impl/psb_c_elg_from_gpu.F90 b/cuda/impl/psb_c_elg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_elg_from_gpu.F90 rename to cuda/impl/psb_c_elg_from_gpu.F90 diff --git a/gpu/impl/psb_c_elg_inner_vect_sv.F90 b/cuda/impl/psb_c_elg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_c_elg_inner_vect_sv.F90 rename to cuda/impl/psb_c_elg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_c_elg_mold.F90 b/cuda/impl/psb_c_elg_mold.F90 similarity index 100% rename from gpu/impl/psb_c_elg_mold.F90 rename to cuda/impl/psb_c_elg_mold.F90 diff --git a/gpu/impl/psb_c_elg_reallocate_nz.F90 b/cuda/impl/psb_c_elg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_c_elg_reallocate_nz.F90 rename to cuda/impl/psb_c_elg_reallocate_nz.F90 diff --git a/gpu/impl/psb_c_elg_scal.F90 b/cuda/impl/psb_c_elg_scal.F90 similarity index 100% rename from gpu/impl/psb_c_elg_scal.F90 rename to cuda/impl/psb_c_elg_scal.F90 diff --git a/gpu/impl/psb_c_elg_scals.F90 b/cuda/impl/psb_c_elg_scals.F90 similarity index 100% rename from gpu/impl/psb_c_elg_scals.F90 rename to cuda/impl/psb_c_elg_scals.F90 diff --git a/gpu/impl/psb_c_elg_to_gpu.F90 b/cuda/impl/psb_c_elg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_elg_to_gpu.F90 rename to cuda/impl/psb_c_elg_to_gpu.F90 diff --git a/gpu/impl/psb_c_elg_trim.f90 b/cuda/impl/psb_c_elg_trim.f90 similarity index 100% rename from gpu/impl/psb_c_elg_trim.f90 rename to cuda/impl/psb_c_elg_trim.f90 diff --git a/gpu/impl/psb_c_elg_vect_mv.F90 b/cuda/impl/psb_c_elg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_c_elg_vect_mv.F90 rename to cuda/impl/psb_c_elg_vect_mv.F90 diff --git a/gpu/impl/psb_c_hdiag_csmv.F90 b/cuda/impl/psb_c_hdiag_csmv.F90 similarity index 100% rename from gpu/impl/psb_c_hdiag_csmv.F90 rename to cuda/impl/psb_c_hdiag_csmv.F90 diff --git a/gpu/impl/psb_c_hdiag_mold.F90 b/cuda/impl/psb_c_hdiag_mold.F90 similarity index 100% rename from gpu/impl/psb_c_hdiag_mold.F90 rename to cuda/impl/psb_c_hdiag_mold.F90 diff --git a/gpu/impl/psb_c_hdiag_to_gpu.F90 b/cuda/impl/psb_c_hdiag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_hdiag_to_gpu.F90 rename to cuda/impl/psb_c_hdiag_to_gpu.F90 diff --git a/gpu/impl/psb_c_hdiag_vect_mv.F90 b/cuda/impl/psb_c_hdiag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_c_hdiag_vect_mv.F90 rename to cuda/impl/psb_c_hdiag_vect_mv.F90 diff --git a/gpu/impl/psb_c_hlg_allocate_mnnz.F90 b/cuda/impl/psb_c_hlg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_c_hlg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_c_hlg_csmm.F90 b/cuda/impl/psb_c_hlg_csmm.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_csmm.F90 rename to cuda/impl/psb_c_hlg_csmm.F90 diff --git a/gpu/impl/psb_c_hlg_csmv.F90 b/cuda/impl/psb_c_hlg_csmv.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_csmv.F90 rename to cuda/impl/psb_c_hlg_csmv.F90 diff --git a/gpu/impl/psb_c_hlg_from_gpu.F90 b/cuda/impl/psb_c_hlg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_from_gpu.F90 rename to cuda/impl/psb_c_hlg_from_gpu.F90 diff --git a/gpu/impl/psb_c_hlg_inner_vect_sv.F90 b/cuda/impl/psb_c_hlg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_c_hlg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_c_hlg_mold.F90 b/cuda/impl/psb_c_hlg_mold.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_mold.F90 rename to cuda/impl/psb_c_hlg_mold.F90 diff --git a/gpu/impl/psb_c_hlg_reallocate_nz.F90 b/cuda/impl/psb_c_hlg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_reallocate_nz.F90 rename to cuda/impl/psb_c_hlg_reallocate_nz.F90 diff --git a/gpu/impl/psb_c_hlg_scal.F90 b/cuda/impl/psb_c_hlg_scal.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_scal.F90 rename to cuda/impl/psb_c_hlg_scal.F90 diff --git a/gpu/impl/psb_c_hlg_scals.F90 b/cuda/impl/psb_c_hlg_scals.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_scals.F90 rename to cuda/impl/psb_c_hlg_scals.F90 diff --git a/gpu/impl/psb_c_hlg_to_gpu.F90 b/cuda/impl/psb_c_hlg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_to_gpu.F90 rename to cuda/impl/psb_c_hlg_to_gpu.F90 diff --git a/gpu/impl/psb_c_hlg_vect_mv.F90 b/cuda/impl/psb_c_hlg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_vect_mv.F90 rename to cuda/impl/psb_c_hlg_vect_mv.F90 diff --git a/gpu/impl/psb_c_hybg_allocate_mnnz.F90 b/cuda/impl/psb_c_hybg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_c_hybg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_c_hybg_csmm.F90 b/cuda/impl/psb_c_hybg_csmm.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_csmm.F90 rename to cuda/impl/psb_c_hybg_csmm.F90 diff --git a/gpu/impl/psb_c_hybg_csmv.F90 b/cuda/impl/psb_c_hybg_csmv.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_csmv.F90 rename to cuda/impl/psb_c_hybg_csmv.F90 diff --git a/gpu/impl/psb_c_hybg_inner_vect_sv.F90 b/cuda/impl/psb_c_hybg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_c_hybg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_c_hybg_mold.F90 b/cuda/impl/psb_c_hybg_mold.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_mold.F90 rename to cuda/impl/psb_c_hybg_mold.F90 diff --git a/gpu/impl/psb_c_hybg_reallocate_nz.F90 b/cuda/impl/psb_c_hybg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_reallocate_nz.F90 rename to cuda/impl/psb_c_hybg_reallocate_nz.F90 diff --git a/gpu/impl/psb_c_hybg_scal.F90 b/cuda/impl/psb_c_hybg_scal.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_scal.F90 rename to cuda/impl/psb_c_hybg_scal.F90 diff --git a/gpu/impl/psb_c_hybg_scals.F90 b/cuda/impl/psb_c_hybg_scals.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_scals.F90 rename to cuda/impl/psb_c_hybg_scals.F90 diff --git a/gpu/impl/psb_c_hybg_to_gpu.F90 b/cuda/impl/psb_c_hybg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_to_gpu.F90 rename to cuda/impl/psb_c_hybg_to_gpu.F90 diff --git a/gpu/impl/psb_c_hybg_vect_mv.F90 b/cuda/impl/psb_c_hybg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_vect_mv.F90 rename to cuda/impl/psb_c_hybg_vect_mv.F90 diff --git a/gpu/impl/psb_c_mv_csrg_from_coo.F90 b/cuda/impl/psb_c_mv_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_mv_csrg_from_coo.F90 rename to cuda/impl/psb_c_mv_csrg_from_coo.F90 diff --git a/gpu/impl/psb_c_mv_csrg_from_fmt.F90 b/cuda/impl/psb_c_mv_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_c_mv_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_c_mv_diag_from_coo.F90 b/cuda/impl/psb_c_mv_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_mv_diag_from_coo.F90 rename to cuda/impl/psb_c_mv_diag_from_coo.F90 diff --git a/gpu/impl/psb_c_mv_elg_from_coo.F90 b/cuda/impl/psb_c_mv_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_mv_elg_from_coo.F90 rename to cuda/impl/psb_c_mv_elg_from_coo.F90 diff --git a/gpu/impl/psb_c_mv_elg_from_fmt.F90 b/cuda/impl/psb_c_mv_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_mv_elg_from_fmt.F90 rename to cuda/impl/psb_c_mv_elg_from_fmt.F90 diff --git a/gpu/impl/psb_c_mv_hdiag_from_coo.F90 b/cuda/impl/psb_c_mv_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_c_mv_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_c_mv_hlg_from_coo.F90 b/cuda/impl/psb_c_mv_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_mv_hlg_from_coo.F90 rename to cuda/impl/psb_c_mv_hlg_from_coo.F90 diff --git a/gpu/impl/psb_c_mv_hlg_from_fmt.F90 b/cuda/impl/psb_c_mv_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_c_mv_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_c_mv_hybg_from_coo.F90 b/cuda/impl/psb_c_mv_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_mv_hybg_from_coo.F90 rename to cuda/impl/psb_c_mv_hybg_from_coo.F90 diff --git a/gpu/impl/psb_c_mv_hybg_from_fmt.F90 b/cuda/impl/psb_c_mv_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_c_mv_hybg_from_fmt.F90 diff --git a/gpu/impl/psb_d_cp_csrg_from_coo.F90 b/cuda/impl/psb_d_cp_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_cp_csrg_from_coo.F90 rename to cuda/impl/psb_d_cp_csrg_from_coo.F90 diff --git a/gpu/impl/psb_d_cp_csrg_from_fmt.F90 b/cuda/impl/psb_d_cp_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_d_cp_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_d_cp_diag_from_coo.F90 b/cuda/impl/psb_d_cp_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_cp_diag_from_coo.F90 rename to cuda/impl/psb_d_cp_diag_from_coo.F90 diff --git a/gpu/impl/psb_d_cp_elg_from_coo.F90 b/cuda/impl/psb_d_cp_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_cp_elg_from_coo.F90 rename to cuda/impl/psb_d_cp_elg_from_coo.F90 diff --git a/gpu/impl/psb_d_cp_elg_from_fmt.F90 b/cuda/impl/psb_d_cp_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_cp_elg_from_fmt.F90 rename to cuda/impl/psb_d_cp_elg_from_fmt.F90 diff --git a/gpu/impl/psb_d_cp_hdiag_from_coo.F90 b/cuda/impl/psb_d_cp_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_d_cp_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_d_cp_hlg_from_coo.F90 b/cuda/impl/psb_d_cp_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_cp_hlg_from_coo.F90 rename to cuda/impl/psb_d_cp_hlg_from_coo.F90 diff --git a/gpu/impl/psb_d_cp_hlg_from_fmt.F90 b/cuda/impl/psb_d_cp_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_d_cp_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_d_cp_hybg_from_coo.F90 b/cuda/impl/psb_d_cp_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_cp_hybg_from_coo.F90 rename to cuda/impl/psb_d_cp_hybg_from_coo.F90 diff --git a/gpu/impl/psb_d_cp_hybg_from_fmt.F90 b/cuda/impl/psb_d_cp_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_d_cp_hybg_from_fmt.F90 diff --git a/gpu/impl/psb_d_csrg_allocate_mnnz.F90 b/cuda/impl/psb_d_csrg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_d_csrg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_d_csrg_csmm.F90 b/cuda/impl/psb_d_csrg_csmm.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_csmm.F90 rename to cuda/impl/psb_d_csrg_csmm.F90 diff --git a/gpu/impl/psb_d_csrg_csmv.F90 b/cuda/impl/psb_d_csrg_csmv.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_csmv.F90 rename to cuda/impl/psb_d_csrg_csmv.F90 diff --git a/gpu/impl/psb_d_csrg_from_gpu.F90 b/cuda/impl/psb_d_csrg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_from_gpu.F90 rename to cuda/impl/psb_d_csrg_from_gpu.F90 diff --git a/gpu/impl/psb_d_csrg_inner_vect_sv.F90 b/cuda/impl/psb_d_csrg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_d_csrg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_d_csrg_mold.F90 b/cuda/impl/psb_d_csrg_mold.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_mold.F90 rename to cuda/impl/psb_d_csrg_mold.F90 diff --git a/gpu/impl/psb_d_csrg_reallocate_nz.F90 b/cuda/impl/psb_d_csrg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_reallocate_nz.F90 rename to cuda/impl/psb_d_csrg_reallocate_nz.F90 diff --git a/gpu/impl/psb_d_csrg_scal.F90 b/cuda/impl/psb_d_csrg_scal.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_scal.F90 rename to cuda/impl/psb_d_csrg_scal.F90 diff --git a/gpu/impl/psb_d_csrg_scals.F90 b/cuda/impl/psb_d_csrg_scals.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_scals.F90 rename to cuda/impl/psb_d_csrg_scals.F90 diff --git a/gpu/impl/psb_d_csrg_to_gpu.F90 b/cuda/impl/psb_d_csrg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_to_gpu.F90 rename to cuda/impl/psb_d_csrg_to_gpu.F90 diff --git a/gpu/impl/psb_d_csrg_vect_mv.F90 b/cuda/impl/psb_d_csrg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_vect_mv.F90 rename to cuda/impl/psb_d_csrg_vect_mv.F90 diff --git a/gpu/impl/psb_d_diag_csmv.F90 b/cuda/impl/psb_d_diag_csmv.F90 similarity index 100% rename from gpu/impl/psb_d_diag_csmv.F90 rename to cuda/impl/psb_d_diag_csmv.F90 diff --git a/gpu/impl/psb_d_diag_mold.F90 b/cuda/impl/psb_d_diag_mold.F90 similarity index 100% rename from gpu/impl/psb_d_diag_mold.F90 rename to cuda/impl/psb_d_diag_mold.F90 diff --git a/gpu/impl/psb_d_diag_to_gpu.F90 b/cuda/impl/psb_d_diag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_diag_to_gpu.F90 rename to cuda/impl/psb_d_diag_to_gpu.F90 diff --git a/gpu/impl/psb_d_diag_vect_mv.F90 b/cuda/impl/psb_d_diag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_d_diag_vect_mv.F90 rename to cuda/impl/psb_d_diag_vect_mv.F90 diff --git a/gpu/impl/psb_d_dnsg_mat_impl.F90 b/cuda/impl/psb_d_dnsg_mat_impl.F90 similarity index 100% rename from gpu/impl/psb_d_dnsg_mat_impl.F90 rename to cuda/impl/psb_d_dnsg_mat_impl.F90 diff --git a/gpu/impl/psb_d_elg_allocate_mnnz.F90 b/cuda/impl/psb_d_elg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_d_elg_allocate_mnnz.F90 rename to cuda/impl/psb_d_elg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_d_elg_asb.f90 b/cuda/impl/psb_d_elg_asb.f90 similarity index 100% rename from gpu/impl/psb_d_elg_asb.f90 rename to cuda/impl/psb_d_elg_asb.f90 diff --git a/gpu/impl/psb_d_elg_csmm.F90 b/cuda/impl/psb_d_elg_csmm.F90 similarity index 100% rename from gpu/impl/psb_d_elg_csmm.F90 rename to cuda/impl/psb_d_elg_csmm.F90 diff --git a/gpu/impl/psb_d_elg_csmv.F90 b/cuda/impl/psb_d_elg_csmv.F90 similarity index 100% rename from gpu/impl/psb_d_elg_csmv.F90 rename to cuda/impl/psb_d_elg_csmv.F90 diff --git a/gpu/impl/psb_d_elg_csput.F90 b/cuda/impl/psb_d_elg_csput.F90 similarity index 100% rename from gpu/impl/psb_d_elg_csput.F90 rename to cuda/impl/psb_d_elg_csput.F90 diff --git a/gpu/impl/psb_d_elg_from_gpu.F90 b/cuda/impl/psb_d_elg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_elg_from_gpu.F90 rename to cuda/impl/psb_d_elg_from_gpu.F90 diff --git a/gpu/impl/psb_d_elg_inner_vect_sv.F90 b/cuda/impl/psb_d_elg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_d_elg_inner_vect_sv.F90 rename to cuda/impl/psb_d_elg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_d_elg_mold.F90 b/cuda/impl/psb_d_elg_mold.F90 similarity index 100% rename from gpu/impl/psb_d_elg_mold.F90 rename to cuda/impl/psb_d_elg_mold.F90 diff --git a/gpu/impl/psb_d_elg_reallocate_nz.F90 b/cuda/impl/psb_d_elg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_d_elg_reallocate_nz.F90 rename to cuda/impl/psb_d_elg_reallocate_nz.F90 diff --git a/gpu/impl/psb_d_elg_scal.F90 b/cuda/impl/psb_d_elg_scal.F90 similarity index 100% rename from gpu/impl/psb_d_elg_scal.F90 rename to cuda/impl/psb_d_elg_scal.F90 diff --git a/gpu/impl/psb_d_elg_scals.F90 b/cuda/impl/psb_d_elg_scals.F90 similarity index 100% rename from gpu/impl/psb_d_elg_scals.F90 rename to cuda/impl/psb_d_elg_scals.F90 diff --git a/gpu/impl/psb_d_elg_to_gpu.F90 b/cuda/impl/psb_d_elg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_elg_to_gpu.F90 rename to cuda/impl/psb_d_elg_to_gpu.F90 diff --git a/gpu/impl/psb_d_elg_trim.f90 b/cuda/impl/psb_d_elg_trim.f90 similarity index 100% rename from gpu/impl/psb_d_elg_trim.f90 rename to cuda/impl/psb_d_elg_trim.f90 diff --git a/gpu/impl/psb_d_elg_vect_mv.F90 b/cuda/impl/psb_d_elg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_d_elg_vect_mv.F90 rename to cuda/impl/psb_d_elg_vect_mv.F90 diff --git a/gpu/impl/psb_d_hdiag_csmv.F90 b/cuda/impl/psb_d_hdiag_csmv.F90 similarity index 100% rename from gpu/impl/psb_d_hdiag_csmv.F90 rename to cuda/impl/psb_d_hdiag_csmv.F90 diff --git a/gpu/impl/psb_d_hdiag_mold.F90 b/cuda/impl/psb_d_hdiag_mold.F90 similarity index 100% rename from gpu/impl/psb_d_hdiag_mold.F90 rename to cuda/impl/psb_d_hdiag_mold.F90 diff --git a/gpu/impl/psb_d_hdiag_to_gpu.F90 b/cuda/impl/psb_d_hdiag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_hdiag_to_gpu.F90 rename to cuda/impl/psb_d_hdiag_to_gpu.F90 diff --git a/gpu/impl/psb_d_hdiag_vect_mv.F90 b/cuda/impl/psb_d_hdiag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_d_hdiag_vect_mv.F90 rename to cuda/impl/psb_d_hdiag_vect_mv.F90 diff --git a/gpu/impl/psb_d_hlg_allocate_mnnz.F90 b/cuda/impl/psb_d_hlg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_d_hlg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_d_hlg_csmm.F90 b/cuda/impl/psb_d_hlg_csmm.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_csmm.F90 rename to cuda/impl/psb_d_hlg_csmm.F90 diff --git a/gpu/impl/psb_d_hlg_csmv.F90 b/cuda/impl/psb_d_hlg_csmv.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_csmv.F90 rename to cuda/impl/psb_d_hlg_csmv.F90 diff --git a/gpu/impl/psb_d_hlg_from_gpu.F90 b/cuda/impl/psb_d_hlg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_from_gpu.F90 rename to cuda/impl/psb_d_hlg_from_gpu.F90 diff --git a/gpu/impl/psb_d_hlg_inner_vect_sv.F90 b/cuda/impl/psb_d_hlg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_d_hlg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_d_hlg_mold.F90 b/cuda/impl/psb_d_hlg_mold.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_mold.F90 rename to cuda/impl/psb_d_hlg_mold.F90 diff --git a/gpu/impl/psb_d_hlg_reallocate_nz.F90 b/cuda/impl/psb_d_hlg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_reallocate_nz.F90 rename to cuda/impl/psb_d_hlg_reallocate_nz.F90 diff --git a/gpu/impl/psb_d_hlg_scal.F90 b/cuda/impl/psb_d_hlg_scal.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_scal.F90 rename to cuda/impl/psb_d_hlg_scal.F90 diff --git a/gpu/impl/psb_d_hlg_scals.F90 b/cuda/impl/psb_d_hlg_scals.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_scals.F90 rename to cuda/impl/psb_d_hlg_scals.F90 diff --git a/gpu/impl/psb_d_hlg_to_gpu.F90 b/cuda/impl/psb_d_hlg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_to_gpu.F90 rename to cuda/impl/psb_d_hlg_to_gpu.F90 diff --git a/gpu/impl/psb_d_hlg_vect_mv.F90 b/cuda/impl/psb_d_hlg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_vect_mv.F90 rename to cuda/impl/psb_d_hlg_vect_mv.F90 diff --git a/gpu/impl/psb_d_hybg_allocate_mnnz.F90 b/cuda/impl/psb_d_hybg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_d_hybg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_d_hybg_csmm.F90 b/cuda/impl/psb_d_hybg_csmm.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_csmm.F90 rename to cuda/impl/psb_d_hybg_csmm.F90 diff --git a/gpu/impl/psb_d_hybg_csmv.F90 b/cuda/impl/psb_d_hybg_csmv.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_csmv.F90 rename to cuda/impl/psb_d_hybg_csmv.F90 diff --git a/gpu/impl/psb_d_hybg_inner_vect_sv.F90 b/cuda/impl/psb_d_hybg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_d_hybg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_d_hybg_mold.F90 b/cuda/impl/psb_d_hybg_mold.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_mold.F90 rename to cuda/impl/psb_d_hybg_mold.F90 diff --git a/gpu/impl/psb_d_hybg_reallocate_nz.F90 b/cuda/impl/psb_d_hybg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_reallocate_nz.F90 rename to cuda/impl/psb_d_hybg_reallocate_nz.F90 diff --git a/gpu/impl/psb_d_hybg_scal.F90 b/cuda/impl/psb_d_hybg_scal.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_scal.F90 rename to cuda/impl/psb_d_hybg_scal.F90 diff --git a/gpu/impl/psb_d_hybg_scals.F90 b/cuda/impl/psb_d_hybg_scals.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_scals.F90 rename to cuda/impl/psb_d_hybg_scals.F90 diff --git a/gpu/impl/psb_d_hybg_to_gpu.F90 b/cuda/impl/psb_d_hybg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_to_gpu.F90 rename to cuda/impl/psb_d_hybg_to_gpu.F90 diff --git a/gpu/impl/psb_d_hybg_vect_mv.F90 b/cuda/impl/psb_d_hybg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_vect_mv.F90 rename to cuda/impl/psb_d_hybg_vect_mv.F90 diff --git a/gpu/impl/psb_d_mv_csrg_from_coo.F90 b/cuda/impl/psb_d_mv_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_mv_csrg_from_coo.F90 rename to cuda/impl/psb_d_mv_csrg_from_coo.F90 diff --git a/gpu/impl/psb_d_mv_csrg_from_fmt.F90 b/cuda/impl/psb_d_mv_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_d_mv_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_d_mv_diag_from_coo.F90 b/cuda/impl/psb_d_mv_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_mv_diag_from_coo.F90 rename to cuda/impl/psb_d_mv_diag_from_coo.F90 diff --git a/gpu/impl/psb_d_mv_elg_from_coo.F90 b/cuda/impl/psb_d_mv_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_mv_elg_from_coo.F90 rename to cuda/impl/psb_d_mv_elg_from_coo.F90 diff --git a/gpu/impl/psb_d_mv_elg_from_fmt.F90 b/cuda/impl/psb_d_mv_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_mv_elg_from_fmt.F90 rename to cuda/impl/psb_d_mv_elg_from_fmt.F90 diff --git a/gpu/impl/psb_d_mv_hdiag_from_coo.F90 b/cuda/impl/psb_d_mv_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_d_mv_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_d_mv_hlg_from_coo.F90 b/cuda/impl/psb_d_mv_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_mv_hlg_from_coo.F90 rename to cuda/impl/psb_d_mv_hlg_from_coo.F90 diff --git a/gpu/impl/psb_d_mv_hlg_from_fmt.F90 b/cuda/impl/psb_d_mv_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_d_mv_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_d_mv_hybg_from_coo.F90 b/cuda/impl/psb_d_mv_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_mv_hybg_from_coo.F90 rename to cuda/impl/psb_d_mv_hybg_from_coo.F90 diff --git a/gpu/impl/psb_d_mv_hybg_from_fmt.F90 b/cuda/impl/psb_d_mv_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_d_mv_hybg_from_fmt.F90 diff --git a/gpu/impl/psb_s_cp_csrg_from_coo.F90 b/cuda/impl/psb_s_cp_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_cp_csrg_from_coo.F90 rename to cuda/impl/psb_s_cp_csrg_from_coo.F90 diff --git a/gpu/impl/psb_s_cp_csrg_from_fmt.F90 b/cuda/impl/psb_s_cp_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_s_cp_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_s_cp_diag_from_coo.F90 b/cuda/impl/psb_s_cp_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_cp_diag_from_coo.F90 rename to cuda/impl/psb_s_cp_diag_from_coo.F90 diff --git a/gpu/impl/psb_s_cp_elg_from_coo.F90 b/cuda/impl/psb_s_cp_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_cp_elg_from_coo.F90 rename to cuda/impl/psb_s_cp_elg_from_coo.F90 diff --git a/gpu/impl/psb_s_cp_elg_from_fmt.F90 b/cuda/impl/psb_s_cp_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_cp_elg_from_fmt.F90 rename to cuda/impl/psb_s_cp_elg_from_fmt.F90 diff --git a/gpu/impl/psb_s_cp_hdiag_from_coo.F90 b/cuda/impl/psb_s_cp_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_s_cp_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_s_cp_hlg_from_coo.F90 b/cuda/impl/psb_s_cp_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_cp_hlg_from_coo.F90 rename to cuda/impl/psb_s_cp_hlg_from_coo.F90 diff --git a/gpu/impl/psb_s_cp_hlg_from_fmt.F90 b/cuda/impl/psb_s_cp_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_s_cp_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_s_cp_hybg_from_coo.F90 b/cuda/impl/psb_s_cp_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_cp_hybg_from_coo.F90 rename to cuda/impl/psb_s_cp_hybg_from_coo.F90 diff --git a/gpu/impl/psb_s_cp_hybg_from_fmt.F90 b/cuda/impl/psb_s_cp_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_s_cp_hybg_from_fmt.F90 diff --git a/gpu/impl/psb_s_csrg_allocate_mnnz.F90 b/cuda/impl/psb_s_csrg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_s_csrg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_s_csrg_csmm.F90 b/cuda/impl/psb_s_csrg_csmm.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_csmm.F90 rename to cuda/impl/psb_s_csrg_csmm.F90 diff --git a/gpu/impl/psb_s_csrg_csmv.F90 b/cuda/impl/psb_s_csrg_csmv.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_csmv.F90 rename to cuda/impl/psb_s_csrg_csmv.F90 diff --git a/gpu/impl/psb_s_csrg_from_gpu.F90 b/cuda/impl/psb_s_csrg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_from_gpu.F90 rename to cuda/impl/psb_s_csrg_from_gpu.F90 diff --git a/gpu/impl/psb_s_csrg_inner_vect_sv.F90 b/cuda/impl/psb_s_csrg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_s_csrg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_s_csrg_mold.F90 b/cuda/impl/psb_s_csrg_mold.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_mold.F90 rename to cuda/impl/psb_s_csrg_mold.F90 diff --git a/gpu/impl/psb_s_csrg_reallocate_nz.F90 b/cuda/impl/psb_s_csrg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_reallocate_nz.F90 rename to cuda/impl/psb_s_csrg_reallocate_nz.F90 diff --git a/gpu/impl/psb_s_csrg_scal.F90 b/cuda/impl/psb_s_csrg_scal.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_scal.F90 rename to cuda/impl/psb_s_csrg_scal.F90 diff --git a/gpu/impl/psb_s_csrg_scals.F90 b/cuda/impl/psb_s_csrg_scals.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_scals.F90 rename to cuda/impl/psb_s_csrg_scals.F90 diff --git a/gpu/impl/psb_s_csrg_to_gpu.F90 b/cuda/impl/psb_s_csrg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_to_gpu.F90 rename to cuda/impl/psb_s_csrg_to_gpu.F90 diff --git a/gpu/impl/psb_s_csrg_vect_mv.F90 b/cuda/impl/psb_s_csrg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_vect_mv.F90 rename to cuda/impl/psb_s_csrg_vect_mv.F90 diff --git a/gpu/impl/psb_s_diag_csmv.F90 b/cuda/impl/psb_s_diag_csmv.F90 similarity index 100% rename from gpu/impl/psb_s_diag_csmv.F90 rename to cuda/impl/psb_s_diag_csmv.F90 diff --git a/gpu/impl/psb_s_diag_mold.F90 b/cuda/impl/psb_s_diag_mold.F90 similarity index 100% rename from gpu/impl/psb_s_diag_mold.F90 rename to cuda/impl/psb_s_diag_mold.F90 diff --git a/gpu/impl/psb_s_diag_to_gpu.F90 b/cuda/impl/psb_s_diag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_diag_to_gpu.F90 rename to cuda/impl/psb_s_diag_to_gpu.F90 diff --git a/gpu/impl/psb_s_diag_vect_mv.F90 b/cuda/impl/psb_s_diag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_s_diag_vect_mv.F90 rename to cuda/impl/psb_s_diag_vect_mv.F90 diff --git a/gpu/impl/psb_s_dnsg_mat_impl.F90 b/cuda/impl/psb_s_dnsg_mat_impl.F90 similarity index 100% rename from gpu/impl/psb_s_dnsg_mat_impl.F90 rename to cuda/impl/psb_s_dnsg_mat_impl.F90 diff --git a/gpu/impl/psb_s_elg_allocate_mnnz.F90 b/cuda/impl/psb_s_elg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_s_elg_allocate_mnnz.F90 rename to cuda/impl/psb_s_elg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_s_elg_asb.f90 b/cuda/impl/psb_s_elg_asb.f90 similarity index 100% rename from gpu/impl/psb_s_elg_asb.f90 rename to cuda/impl/psb_s_elg_asb.f90 diff --git a/gpu/impl/psb_s_elg_csmm.F90 b/cuda/impl/psb_s_elg_csmm.F90 similarity index 100% rename from gpu/impl/psb_s_elg_csmm.F90 rename to cuda/impl/psb_s_elg_csmm.F90 diff --git a/gpu/impl/psb_s_elg_csmv.F90 b/cuda/impl/psb_s_elg_csmv.F90 similarity index 100% rename from gpu/impl/psb_s_elg_csmv.F90 rename to cuda/impl/psb_s_elg_csmv.F90 diff --git a/gpu/impl/psb_s_elg_csput.F90 b/cuda/impl/psb_s_elg_csput.F90 similarity index 100% rename from gpu/impl/psb_s_elg_csput.F90 rename to cuda/impl/psb_s_elg_csput.F90 diff --git a/gpu/impl/psb_s_elg_from_gpu.F90 b/cuda/impl/psb_s_elg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_elg_from_gpu.F90 rename to cuda/impl/psb_s_elg_from_gpu.F90 diff --git a/gpu/impl/psb_s_elg_inner_vect_sv.F90 b/cuda/impl/psb_s_elg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_s_elg_inner_vect_sv.F90 rename to cuda/impl/psb_s_elg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_s_elg_mold.F90 b/cuda/impl/psb_s_elg_mold.F90 similarity index 100% rename from gpu/impl/psb_s_elg_mold.F90 rename to cuda/impl/psb_s_elg_mold.F90 diff --git a/gpu/impl/psb_s_elg_reallocate_nz.F90 b/cuda/impl/psb_s_elg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_s_elg_reallocate_nz.F90 rename to cuda/impl/psb_s_elg_reallocate_nz.F90 diff --git a/gpu/impl/psb_s_elg_scal.F90 b/cuda/impl/psb_s_elg_scal.F90 similarity index 100% rename from gpu/impl/psb_s_elg_scal.F90 rename to cuda/impl/psb_s_elg_scal.F90 diff --git a/gpu/impl/psb_s_elg_scals.F90 b/cuda/impl/psb_s_elg_scals.F90 similarity index 100% rename from gpu/impl/psb_s_elg_scals.F90 rename to cuda/impl/psb_s_elg_scals.F90 diff --git a/gpu/impl/psb_s_elg_to_gpu.F90 b/cuda/impl/psb_s_elg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_elg_to_gpu.F90 rename to cuda/impl/psb_s_elg_to_gpu.F90 diff --git a/gpu/impl/psb_s_elg_trim.f90 b/cuda/impl/psb_s_elg_trim.f90 similarity index 100% rename from gpu/impl/psb_s_elg_trim.f90 rename to cuda/impl/psb_s_elg_trim.f90 diff --git a/gpu/impl/psb_s_elg_vect_mv.F90 b/cuda/impl/psb_s_elg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_s_elg_vect_mv.F90 rename to cuda/impl/psb_s_elg_vect_mv.F90 diff --git a/gpu/impl/psb_s_hdiag_csmv.F90 b/cuda/impl/psb_s_hdiag_csmv.F90 similarity index 100% rename from gpu/impl/psb_s_hdiag_csmv.F90 rename to cuda/impl/psb_s_hdiag_csmv.F90 diff --git a/gpu/impl/psb_s_hdiag_mold.F90 b/cuda/impl/psb_s_hdiag_mold.F90 similarity index 100% rename from gpu/impl/psb_s_hdiag_mold.F90 rename to cuda/impl/psb_s_hdiag_mold.F90 diff --git a/gpu/impl/psb_s_hdiag_to_gpu.F90 b/cuda/impl/psb_s_hdiag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_hdiag_to_gpu.F90 rename to cuda/impl/psb_s_hdiag_to_gpu.F90 diff --git a/gpu/impl/psb_s_hdiag_vect_mv.F90 b/cuda/impl/psb_s_hdiag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_s_hdiag_vect_mv.F90 rename to cuda/impl/psb_s_hdiag_vect_mv.F90 diff --git a/gpu/impl/psb_s_hlg_allocate_mnnz.F90 b/cuda/impl/psb_s_hlg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_s_hlg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_s_hlg_csmm.F90 b/cuda/impl/psb_s_hlg_csmm.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_csmm.F90 rename to cuda/impl/psb_s_hlg_csmm.F90 diff --git a/gpu/impl/psb_s_hlg_csmv.F90 b/cuda/impl/psb_s_hlg_csmv.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_csmv.F90 rename to cuda/impl/psb_s_hlg_csmv.F90 diff --git a/gpu/impl/psb_s_hlg_from_gpu.F90 b/cuda/impl/psb_s_hlg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_from_gpu.F90 rename to cuda/impl/psb_s_hlg_from_gpu.F90 diff --git a/gpu/impl/psb_s_hlg_inner_vect_sv.F90 b/cuda/impl/psb_s_hlg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_s_hlg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_s_hlg_mold.F90 b/cuda/impl/psb_s_hlg_mold.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_mold.F90 rename to cuda/impl/psb_s_hlg_mold.F90 diff --git a/gpu/impl/psb_s_hlg_reallocate_nz.F90 b/cuda/impl/psb_s_hlg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_reallocate_nz.F90 rename to cuda/impl/psb_s_hlg_reallocate_nz.F90 diff --git a/gpu/impl/psb_s_hlg_scal.F90 b/cuda/impl/psb_s_hlg_scal.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_scal.F90 rename to cuda/impl/psb_s_hlg_scal.F90 diff --git a/gpu/impl/psb_s_hlg_scals.F90 b/cuda/impl/psb_s_hlg_scals.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_scals.F90 rename to cuda/impl/psb_s_hlg_scals.F90 diff --git a/gpu/impl/psb_s_hlg_to_gpu.F90 b/cuda/impl/psb_s_hlg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_to_gpu.F90 rename to cuda/impl/psb_s_hlg_to_gpu.F90 diff --git a/gpu/impl/psb_s_hlg_vect_mv.F90 b/cuda/impl/psb_s_hlg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_vect_mv.F90 rename to cuda/impl/psb_s_hlg_vect_mv.F90 diff --git a/gpu/impl/psb_s_hybg_allocate_mnnz.F90 b/cuda/impl/psb_s_hybg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_s_hybg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_s_hybg_csmm.F90 b/cuda/impl/psb_s_hybg_csmm.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_csmm.F90 rename to cuda/impl/psb_s_hybg_csmm.F90 diff --git a/gpu/impl/psb_s_hybg_csmv.F90 b/cuda/impl/psb_s_hybg_csmv.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_csmv.F90 rename to cuda/impl/psb_s_hybg_csmv.F90 diff --git a/gpu/impl/psb_s_hybg_inner_vect_sv.F90 b/cuda/impl/psb_s_hybg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_s_hybg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_s_hybg_mold.F90 b/cuda/impl/psb_s_hybg_mold.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_mold.F90 rename to cuda/impl/psb_s_hybg_mold.F90 diff --git a/gpu/impl/psb_s_hybg_reallocate_nz.F90 b/cuda/impl/psb_s_hybg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_reallocate_nz.F90 rename to cuda/impl/psb_s_hybg_reallocate_nz.F90 diff --git a/gpu/impl/psb_s_hybg_scal.F90 b/cuda/impl/psb_s_hybg_scal.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_scal.F90 rename to cuda/impl/psb_s_hybg_scal.F90 diff --git a/gpu/impl/psb_s_hybg_scals.F90 b/cuda/impl/psb_s_hybg_scals.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_scals.F90 rename to cuda/impl/psb_s_hybg_scals.F90 diff --git a/gpu/impl/psb_s_hybg_to_gpu.F90 b/cuda/impl/psb_s_hybg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_to_gpu.F90 rename to cuda/impl/psb_s_hybg_to_gpu.F90 diff --git a/gpu/impl/psb_s_hybg_vect_mv.F90 b/cuda/impl/psb_s_hybg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_vect_mv.F90 rename to cuda/impl/psb_s_hybg_vect_mv.F90 diff --git a/gpu/impl/psb_s_mv_csrg_from_coo.F90 b/cuda/impl/psb_s_mv_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_mv_csrg_from_coo.F90 rename to cuda/impl/psb_s_mv_csrg_from_coo.F90 diff --git a/gpu/impl/psb_s_mv_csrg_from_fmt.F90 b/cuda/impl/psb_s_mv_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_s_mv_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_s_mv_diag_from_coo.F90 b/cuda/impl/psb_s_mv_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_mv_diag_from_coo.F90 rename to cuda/impl/psb_s_mv_diag_from_coo.F90 diff --git a/gpu/impl/psb_s_mv_elg_from_coo.F90 b/cuda/impl/psb_s_mv_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_mv_elg_from_coo.F90 rename to cuda/impl/psb_s_mv_elg_from_coo.F90 diff --git a/gpu/impl/psb_s_mv_elg_from_fmt.F90 b/cuda/impl/psb_s_mv_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_mv_elg_from_fmt.F90 rename to cuda/impl/psb_s_mv_elg_from_fmt.F90 diff --git a/gpu/impl/psb_s_mv_hdiag_from_coo.F90 b/cuda/impl/psb_s_mv_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_s_mv_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_s_mv_hlg_from_coo.F90 b/cuda/impl/psb_s_mv_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_mv_hlg_from_coo.F90 rename to cuda/impl/psb_s_mv_hlg_from_coo.F90 diff --git a/gpu/impl/psb_s_mv_hlg_from_fmt.F90 b/cuda/impl/psb_s_mv_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_s_mv_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_s_mv_hybg_from_coo.F90 b/cuda/impl/psb_s_mv_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_mv_hybg_from_coo.F90 rename to cuda/impl/psb_s_mv_hybg_from_coo.F90 diff --git a/gpu/impl/psb_s_mv_hybg_from_fmt.F90 b/cuda/impl/psb_s_mv_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_s_mv_hybg_from_fmt.F90 diff --git a/gpu/impl/psb_z_cp_csrg_from_coo.F90 b/cuda/impl/psb_z_cp_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_cp_csrg_from_coo.F90 rename to cuda/impl/psb_z_cp_csrg_from_coo.F90 diff --git a/gpu/impl/psb_z_cp_csrg_from_fmt.F90 b/cuda/impl/psb_z_cp_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_z_cp_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_z_cp_diag_from_coo.F90 b/cuda/impl/psb_z_cp_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_cp_diag_from_coo.F90 rename to cuda/impl/psb_z_cp_diag_from_coo.F90 diff --git a/gpu/impl/psb_z_cp_elg_from_coo.F90 b/cuda/impl/psb_z_cp_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_cp_elg_from_coo.F90 rename to cuda/impl/psb_z_cp_elg_from_coo.F90 diff --git a/gpu/impl/psb_z_cp_elg_from_fmt.F90 b/cuda/impl/psb_z_cp_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_cp_elg_from_fmt.F90 rename to cuda/impl/psb_z_cp_elg_from_fmt.F90 diff --git a/gpu/impl/psb_z_cp_hdiag_from_coo.F90 b/cuda/impl/psb_z_cp_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_z_cp_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_z_cp_hlg_from_coo.F90 b/cuda/impl/psb_z_cp_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_cp_hlg_from_coo.F90 rename to cuda/impl/psb_z_cp_hlg_from_coo.F90 diff --git a/gpu/impl/psb_z_cp_hlg_from_fmt.F90 b/cuda/impl/psb_z_cp_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_z_cp_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_z_cp_hybg_from_coo.F90 b/cuda/impl/psb_z_cp_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_cp_hybg_from_coo.F90 rename to cuda/impl/psb_z_cp_hybg_from_coo.F90 diff --git a/gpu/impl/psb_z_cp_hybg_from_fmt.F90 b/cuda/impl/psb_z_cp_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_z_cp_hybg_from_fmt.F90 diff --git a/gpu/impl/psb_z_csrg_allocate_mnnz.F90 b/cuda/impl/psb_z_csrg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_z_csrg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_z_csrg_csmm.F90 b/cuda/impl/psb_z_csrg_csmm.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_csmm.F90 rename to cuda/impl/psb_z_csrg_csmm.F90 diff --git a/gpu/impl/psb_z_csrg_csmv.F90 b/cuda/impl/psb_z_csrg_csmv.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_csmv.F90 rename to cuda/impl/psb_z_csrg_csmv.F90 diff --git a/gpu/impl/psb_z_csrg_from_gpu.F90 b/cuda/impl/psb_z_csrg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_from_gpu.F90 rename to cuda/impl/psb_z_csrg_from_gpu.F90 diff --git a/gpu/impl/psb_z_csrg_inner_vect_sv.F90 b/cuda/impl/psb_z_csrg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_z_csrg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_z_csrg_mold.F90 b/cuda/impl/psb_z_csrg_mold.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_mold.F90 rename to cuda/impl/psb_z_csrg_mold.F90 diff --git a/gpu/impl/psb_z_csrg_reallocate_nz.F90 b/cuda/impl/psb_z_csrg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_reallocate_nz.F90 rename to cuda/impl/psb_z_csrg_reallocate_nz.F90 diff --git a/gpu/impl/psb_z_csrg_scal.F90 b/cuda/impl/psb_z_csrg_scal.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_scal.F90 rename to cuda/impl/psb_z_csrg_scal.F90 diff --git a/gpu/impl/psb_z_csrg_scals.F90 b/cuda/impl/psb_z_csrg_scals.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_scals.F90 rename to cuda/impl/psb_z_csrg_scals.F90 diff --git a/gpu/impl/psb_z_csrg_to_gpu.F90 b/cuda/impl/psb_z_csrg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_to_gpu.F90 rename to cuda/impl/psb_z_csrg_to_gpu.F90 diff --git a/gpu/impl/psb_z_csrg_vect_mv.F90 b/cuda/impl/psb_z_csrg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_vect_mv.F90 rename to cuda/impl/psb_z_csrg_vect_mv.F90 diff --git a/gpu/impl/psb_z_diag_csmv.F90 b/cuda/impl/psb_z_diag_csmv.F90 similarity index 100% rename from gpu/impl/psb_z_diag_csmv.F90 rename to cuda/impl/psb_z_diag_csmv.F90 diff --git a/gpu/impl/psb_z_diag_mold.F90 b/cuda/impl/psb_z_diag_mold.F90 similarity index 100% rename from gpu/impl/psb_z_diag_mold.F90 rename to cuda/impl/psb_z_diag_mold.F90 diff --git a/gpu/impl/psb_z_diag_to_gpu.F90 b/cuda/impl/psb_z_diag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_diag_to_gpu.F90 rename to cuda/impl/psb_z_diag_to_gpu.F90 diff --git a/gpu/impl/psb_z_diag_vect_mv.F90 b/cuda/impl/psb_z_diag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_z_diag_vect_mv.F90 rename to cuda/impl/psb_z_diag_vect_mv.F90 diff --git a/gpu/impl/psb_z_dnsg_mat_impl.F90 b/cuda/impl/psb_z_dnsg_mat_impl.F90 similarity index 100% rename from gpu/impl/psb_z_dnsg_mat_impl.F90 rename to cuda/impl/psb_z_dnsg_mat_impl.F90 diff --git a/gpu/impl/psb_z_elg_allocate_mnnz.F90 b/cuda/impl/psb_z_elg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_z_elg_allocate_mnnz.F90 rename to cuda/impl/psb_z_elg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_z_elg_asb.f90 b/cuda/impl/psb_z_elg_asb.f90 similarity index 100% rename from gpu/impl/psb_z_elg_asb.f90 rename to cuda/impl/psb_z_elg_asb.f90 diff --git a/gpu/impl/psb_z_elg_csmm.F90 b/cuda/impl/psb_z_elg_csmm.F90 similarity index 100% rename from gpu/impl/psb_z_elg_csmm.F90 rename to cuda/impl/psb_z_elg_csmm.F90 diff --git a/gpu/impl/psb_z_elg_csmv.F90 b/cuda/impl/psb_z_elg_csmv.F90 similarity index 100% rename from gpu/impl/psb_z_elg_csmv.F90 rename to cuda/impl/psb_z_elg_csmv.F90 diff --git a/gpu/impl/psb_z_elg_csput.F90 b/cuda/impl/psb_z_elg_csput.F90 similarity index 100% rename from gpu/impl/psb_z_elg_csput.F90 rename to cuda/impl/psb_z_elg_csput.F90 diff --git a/gpu/impl/psb_z_elg_from_gpu.F90 b/cuda/impl/psb_z_elg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_elg_from_gpu.F90 rename to cuda/impl/psb_z_elg_from_gpu.F90 diff --git a/gpu/impl/psb_z_elg_inner_vect_sv.F90 b/cuda/impl/psb_z_elg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_z_elg_inner_vect_sv.F90 rename to cuda/impl/psb_z_elg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_z_elg_mold.F90 b/cuda/impl/psb_z_elg_mold.F90 similarity index 100% rename from gpu/impl/psb_z_elg_mold.F90 rename to cuda/impl/psb_z_elg_mold.F90 diff --git a/gpu/impl/psb_z_elg_reallocate_nz.F90 b/cuda/impl/psb_z_elg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_z_elg_reallocate_nz.F90 rename to cuda/impl/psb_z_elg_reallocate_nz.F90 diff --git a/gpu/impl/psb_z_elg_scal.F90 b/cuda/impl/psb_z_elg_scal.F90 similarity index 100% rename from gpu/impl/psb_z_elg_scal.F90 rename to cuda/impl/psb_z_elg_scal.F90 diff --git a/gpu/impl/psb_z_elg_scals.F90 b/cuda/impl/psb_z_elg_scals.F90 similarity index 100% rename from gpu/impl/psb_z_elg_scals.F90 rename to cuda/impl/psb_z_elg_scals.F90 diff --git a/gpu/impl/psb_z_elg_to_gpu.F90 b/cuda/impl/psb_z_elg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_elg_to_gpu.F90 rename to cuda/impl/psb_z_elg_to_gpu.F90 diff --git a/gpu/impl/psb_z_elg_trim.f90 b/cuda/impl/psb_z_elg_trim.f90 similarity index 100% rename from gpu/impl/psb_z_elg_trim.f90 rename to cuda/impl/psb_z_elg_trim.f90 diff --git a/gpu/impl/psb_z_elg_vect_mv.F90 b/cuda/impl/psb_z_elg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_z_elg_vect_mv.F90 rename to cuda/impl/psb_z_elg_vect_mv.F90 diff --git a/gpu/impl/psb_z_hdiag_csmv.F90 b/cuda/impl/psb_z_hdiag_csmv.F90 similarity index 100% rename from gpu/impl/psb_z_hdiag_csmv.F90 rename to cuda/impl/psb_z_hdiag_csmv.F90 diff --git a/gpu/impl/psb_z_hdiag_mold.F90 b/cuda/impl/psb_z_hdiag_mold.F90 similarity index 100% rename from gpu/impl/psb_z_hdiag_mold.F90 rename to cuda/impl/psb_z_hdiag_mold.F90 diff --git a/gpu/impl/psb_z_hdiag_to_gpu.F90 b/cuda/impl/psb_z_hdiag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_hdiag_to_gpu.F90 rename to cuda/impl/psb_z_hdiag_to_gpu.F90 diff --git a/gpu/impl/psb_z_hdiag_vect_mv.F90 b/cuda/impl/psb_z_hdiag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_z_hdiag_vect_mv.F90 rename to cuda/impl/psb_z_hdiag_vect_mv.F90 diff --git a/gpu/impl/psb_z_hlg_allocate_mnnz.F90 b/cuda/impl/psb_z_hlg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_z_hlg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_z_hlg_csmm.F90 b/cuda/impl/psb_z_hlg_csmm.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_csmm.F90 rename to cuda/impl/psb_z_hlg_csmm.F90 diff --git a/gpu/impl/psb_z_hlg_csmv.F90 b/cuda/impl/psb_z_hlg_csmv.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_csmv.F90 rename to cuda/impl/psb_z_hlg_csmv.F90 diff --git a/gpu/impl/psb_z_hlg_from_gpu.F90 b/cuda/impl/psb_z_hlg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_from_gpu.F90 rename to cuda/impl/psb_z_hlg_from_gpu.F90 diff --git a/gpu/impl/psb_z_hlg_inner_vect_sv.F90 b/cuda/impl/psb_z_hlg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_z_hlg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_z_hlg_mold.F90 b/cuda/impl/psb_z_hlg_mold.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_mold.F90 rename to cuda/impl/psb_z_hlg_mold.F90 diff --git a/gpu/impl/psb_z_hlg_reallocate_nz.F90 b/cuda/impl/psb_z_hlg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_reallocate_nz.F90 rename to cuda/impl/psb_z_hlg_reallocate_nz.F90 diff --git a/gpu/impl/psb_z_hlg_scal.F90 b/cuda/impl/psb_z_hlg_scal.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_scal.F90 rename to cuda/impl/psb_z_hlg_scal.F90 diff --git a/gpu/impl/psb_z_hlg_scals.F90 b/cuda/impl/psb_z_hlg_scals.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_scals.F90 rename to cuda/impl/psb_z_hlg_scals.F90 diff --git a/gpu/impl/psb_z_hlg_to_gpu.F90 b/cuda/impl/psb_z_hlg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_to_gpu.F90 rename to cuda/impl/psb_z_hlg_to_gpu.F90 diff --git a/gpu/impl/psb_z_hlg_vect_mv.F90 b/cuda/impl/psb_z_hlg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_vect_mv.F90 rename to cuda/impl/psb_z_hlg_vect_mv.F90 diff --git a/gpu/impl/psb_z_hybg_allocate_mnnz.F90 b/cuda/impl/psb_z_hybg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_z_hybg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_z_hybg_csmm.F90 b/cuda/impl/psb_z_hybg_csmm.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_csmm.F90 rename to cuda/impl/psb_z_hybg_csmm.F90 diff --git a/gpu/impl/psb_z_hybg_csmv.F90 b/cuda/impl/psb_z_hybg_csmv.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_csmv.F90 rename to cuda/impl/psb_z_hybg_csmv.F90 diff --git a/gpu/impl/psb_z_hybg_inner_vect_sv.F90 b/cuda/impl/psb_z_hybg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_z_hybg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_z_hybg_mold.F90 b/cuda/impl/psb_z_hybg_mold.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_mold.F90 rename to cuda/impl/psb_z_hybg_mold.F90 diff --git a/gpu/impl/psb_z_hybg_reallocate_nz.F90 b/cuda/impl/psb_z_hybg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_reallocate_nz.F90 rename to cuda/impl/psb_z_hybg_reallocate_nz.F90 diff --git a/gpu/impl/psb_z_hybg_scal.F90 b/cuda/impl/psb_z_hybg_scal.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_scal.F90 rename to cuda/impl/psb_z_hybg_scal.F90 diff --git a/gpu/impl/psb_z_hybg_scals.F90 b/cuda/impl/psb_z_hybg_scals.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_scals.F90 rename to cuda/impl/psb_z_hybg_scals.F90 diff --git a/gpu/impl/psb_z_hybg_to_gpu.F90 b/cuda/impl/psb_z_hybg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_to_gpu.F90 rename to cuda/impl/psb_z_hybg_to_gpu.F90 diff --git a/gpu/impl/psb_z_hybg_vect_mv.F90 b/cuda/impl/psb_z_hybg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_vect_mv.F90 rename to cuda/impl/psb_z_hybg_vect_mv.F90 diff --git a/gpu/impl/psb_z_mv_csrg_from_coo.F90 b/cuda/impl/psb_z_mv_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_mv_csrg_from_coo.F90 rename to cuda/impl/psb_z_mv_csrg_from_coo.F90 diff --git a/gpu/impl/psb_z_mv_csrg_from_fmt.F90 b/cuda/impl/psb_z_mv_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_z_mv_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_z_mv_diag_from_coo.F90 b/cuda/impl/psb_z_mv_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_mv_diag_from_coo.F90 rename to cuda/impl/psb_z_mv_diag_from_coo.F90 diff --git a/gpu/impl/psb_z_mv_elg_from_coo.F90 b/cuda/impl/psb_z_mv_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_mv_elg_from_coo.F90 rename to cuda/impl/psb_z_mv_elg_from_coo.F90 diff --git a/gpu/impl/psb_z_mv_elg_from_fmt.F90 b/cuda/impl/psb_z_mv_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_mv_elg_from_fmt.F90 rename to cuda/impl/psb_z_mv_elg_from_fmt.F90 diff --git a/gpu/impl/psb_z_mv_hdiag_from_coo.F90 b/cuda/impl/psb_z_mv_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_z_mv_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_z_mv_hlg_from_coo.F90 b/cuda/impl/psb_z_mv_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_mv_hlg_from_coo.F90 rename to cuda/impl/psb_z_mv_hlg_from_coo.F90 diff --git a/gpu/impl/psb_z_mv_hlg_from_fmt.F90 b/cuda/impl/psb_z_mv_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_z_mv_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_z_mv_hybg_from_coo.F90 b/cuda/impl/psb_z_mv_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_mv_hybg_from_coo.F90 rename to cuda/impl/psb_z_mv_hybg_from_coo.F90 diff --git a/gpu/impl/psb_z_mv_hybg_from_fmt.F90 b/cuda/impl/psb_z_mv_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_z_mv_hybg_from_fmt.F90 diff --git a/gpu/ivectordev.c b/cuda/ivectordev.c similarity index 100% rename from gpu/ivectordev.c rename to cuda/ivectordev.c diff --git a/gpu/ivectordev.h b/cuda/ivectordev.h similarity index 100% rename from gpu/ivectordev.h rename to cuda/ivectordev.h diff --git a/gpu/psb_base_vectordev_mod.F90 b/cuda/psb_base_vectordev_mod.F90 similarity index 100% rename from gpu/psb_base_vectordev_mod.F90 rename to cuda/psb_base_vectordev_mod.F90 diff --git a/gpu/psb_c_csrg_mat_mod.F90 b/cuda/psb_c_csrg_mat_mod.F90 similarity index 100% rename from gpu/psb_c_csrg_mat_mod.F90 rename to cuda/psb_c_csrg_mat_mod.F90 diff --git a/gpu/psb_c_diag_mat_mod.F90 b/cuda/psb_c_diag_mat_mod.F90 similarity index 100% rename from gpu/psb_c_diag_mat_mod.F90 rename to cuda/psb_c_diag_mat_mod.F90 diff --git a/gpu/psb_c_dnsg_mat_mod.F90 b/cuda/psb_c_dnsg_mat_mod.F90 similarity index 100% rename from gpu/psb_c_dnsg_mat_mod.F90 rename to cuda/psb_c_dnsg_mat_mod.F90 diff --git a/gpu/psb_c_elg_mat_mod.F90 b/cuda/psb_c_elg_mat_mod.F90 similarity index 100% rename from gpu/psb_c_elg_mat_mod.F90 rename to cuda/psb_c_elg_mat_mod.F90 diff --git a/gpu/psb_c_gpu_vect_mod.F90 b/cuda/psb_c_gpu_vect_mod.F90 similarity index 100% rename from gpu/psb_c_gpu_vect_mod.F90 rename to cuda/psb_c_gpu_vect_mod.F90 diff --git a/gpu/psb_c_hdiag_mat_mod.F90 b/cuda/psb_c_hdiag_mat_mod.F90 similarity index 100% rename from gpu/psb_c_hdiag_mat_mod.F90 rename to cuda/psb_c_hdiag_mat_mod.F90 diff --git a/gpu/psb_c_hlg_mat_mod.F90 b/cuda/psb_c_hlg_mat_mod.F90 similarity index 100% rename from gpu/psb_c_hlg_mat_mod.F90 rename to cuda/psb_c_hlg_mat_mod.F90 diff --git a/gpu/psb_c_hybg_mat_mod.F90 b/cuda/psb_c_hybg_mat_mod.F90 similarity index 100% rename from gpu/psb_c_hybg_mat_mod.F90 rename to cuda/psb_c_hybg_mat_mod.F90 diff --git a/gpu/psb_c_vectordev_mod.F90 b/cuda/psb_c_vectordev_mod.F90 similarity index 100% rename from gpu/psb_c_vectordev_mod.F90 rename to cuda/psb_c_vectordev_mod.F90 diff --git a/gpu/psb_d_csrg_mat_mod.F90 b/cuda/psb_d_csrg_mat_mod.F90 similarity index 100% rename from gpu/psb_d_csrg_mat_mod.F90 rename to cuda/psb_d_csrg_mat_mod.F90 diff --git a/gpu/psb_d_diag_mat_mod.F90 b/cuda/psb_d_diag_mat_mod.F90 similarity index 100% rename from gpu/psb_d_diag_mat_mod.F90 rename to cuda/psb_d_diag_mat_mod.F90 diff --git a/gpu/psb_d_dnsg_mat_mod.F90 b/cuda/psb_d_dnsg_mat_mod.F90 similarity index 100% rename from gpu/psb_d_dnsg_mat_mod.F90 rename to cuda/psb_d_dnsg_mat_mod.F90 diff --git a/gpu/psb_d_elg_mat_mod.F90 b/cuda/psb_d_elg_mat_mod.F90 similarity index 100% rename from gpu/psb_d_elg_mat_mod.F90 rename to cuda/psb_d_elg_mat_mod.F90 diff --git a/gpu/psb_d_gpu_vect_mod.F90 b/cuda/psb_d_gpu_vect_mod.F90 similarity index 100% rename from gpu/psb_d_gpu_vect_mod.F90 rename to cuda/psb_d_gpu_vect_mod.F90 diff --git a/gpu/psb_d_hdiag_mat_mod.F90 b/cuda/psb_d_hdiag_mat_mod.F90 similarity index 100% rename from gpu/psb_d_hdiag_mat_mod.F90 rename to cuda/psb_d_hdiag_mat_mod.F90 diff --git a/gpu/psb_d_hlg_mat_mod.F90 b/cuda/psb_d_hlg_mat_mod.F90 similarity index 100% rename from gpu/psb_d_hlg_mat_mod.F90 rename to cuda/psb_d_hlg_mat_mod.F90 diff --git a/gpu/psb_d_hybg_mat_mod.F90 b/cuda/psb_d_hybg_mat_mod.F90 similarity index 100% rename from gpu/psb_d_hybg_mat_mod.F90 rename to cuda/psb_d_hybg_mat_mod.F90 diff --git a/gpu/psb_d_vectordev_mod.F90 b/cuda/psb_d_vectordev_mod.F90 similarity index 100% rename from gpu/psb_d_vectordev_mod.F90 rename to cuda/psb_d_vectordev_mod.F90 diff --git a/gpu/psb_gpu_env_mod.F90 b/cuda/psb_gpu_env_mod.F90 similarity index 100% rename from gpu/psb_gpu_env_mod.F90 rename to cuda/psb_gpu_env_mod.F90 diff --git a/gpu/psb_gpu_mod.F90 b/cuda/psb_gpu_mod.F90 similarity index 100% rename from gpu/psb_gpu_mod.F90 rename to cuda/psb_gpu_mod.F90 diff --git a/gpu/psb_i_csrg_mat_mod.F90 b/cuda/psb_i_csrg_mat_mod.F90 similarity index 100% rename from gpu/psb_i_csrg_mat_mod.F90 rename to cuda/psb_i_csrg_mat_mod.F90 diff --git a/gpu/psb_i_diag_mat_mod.F90 b/cuda/psb_i_diag_mat_mod.F90 similarity index 100% rename from gpu/psb_i_diag_mat_mod.F90 rename to cuda/psb_i_diag_mat_mod.F90 diff --git a/gpu/psb_i_dnsg_mat_mod.F90 b/cuda/psb_i_dnsg_mat_mod.F90 similarity index 100% rename from gpu/psb_i_dnsg_mat_mod.F90 rename to cuda/psb_i_dnsg_mat_mod.F90 diff --git a/gpu/psb_i_elg_mat_mod.F90 b/cuda/psb_i_elg_mat_mod.F90 similarity index 100% rename from gpu/psb_i_elg_mat_mod.F90 rename to cuda/psb_i_elg_mat_mod.F90 diff --git a/gpu/psb_i_gpu_vect_mod.F90 b/cuda/psb_i_gpu_vect_mod.F90 similarity index 100% rename from gpu/psb_i_gpu_vect_mod.F90 rename to cuda/psb_i_gpu_vect_mod.F90 diff --git a/gpu/psb_i_hdiag_mat_mod.F90 b/cuda/psb_i_hdiag_mat_mod.F90 similarity index 100% rename from gpu/psb_i_hdiag_mat_mod.F90 rename to cuda/psb_i_hdiag_mat_mod.F90 diff --git a/gpu/psb_i_hlg_mat_mod.F90 b/cuda/psb_i_hlg_mat_mod.F90 similarity index 100% rename from gpu/psb_i_hlg_mat_mod.F90 rename to cuda/psb_i_hlg_mat_mod.F90 diff --git a/gpu/psb_i_hybg_mat_mod.F90 b/cuda/psb_i_hybg_mat_mod.F90 similarity index 100% rename from gpu/psb_i_hybg_mat_mod.F90 rename to cuda/psb_i_hybg_mat_mod.F90 diff --git a/gpu/psb_i_vectordev_mod.F90 b/cuda/psb_i_vectordev_mod.F90 similarity index 100% rename from gpu/psb_i_vectordev_mod.F90 rename to cuda/psb_i_vectordev_mod.F90 diff --git a/gpu/psb_s_csrg_mat_mod.F90 b/cuda/psb_s_csrg_mat_mod.F90 similarity index 100% rename from gpu/psb_s_csrg_mat_mod.F90 rename to cuda/psb_s_csrg_mat_mod.F90 diff --git a/gpu/psb_s_diag_mat_mod.F90 b/cuda/psb_s_diag_mat_mod.F90 similarity index 100% rename from gpu/psb_s_diag_mat_mod.F90 rename to cuda/psb_s_diag_mat_mod.F90 diff --git a/gpu/psb_s_dnsg_mat_mod.F90 b/cuda/psb_s_dnsg_mat_mod.F90 similarity index 100% rename from gpu/psb_s_dnsg_mat_mod.F90 rename to cuda/psb_s_dnsg_mat_mod.F90 diff --git a/gpu/psb_s_elg_mat_mod.F90 b/cuda/psb_s_elg_mat_mod.F90 similarity index 100% rename from gpu/psb_s_elg_mat_mod.F90 rename to cuda/psb_s_elg_mat_mod.F90 diff --git a/gpu/psb_s_gpu_vect_mod.F90 b/cuda/psb_s_gpu_vect_mod.F90 similarity index 100% rename from gpu/psb_s_gpu_vect_mod.F90 rename to cuda/psb_s_gpu_vect_mod.F90 diff --git a/gpu/psb_s_hdiag_mat_mod.F90 b/cuda/psb_s_hdiag_mat_mod.F90 similarity index 100% rename from gpu/psb_s_hdiag_mat_mod.F90 rename to cuda/psb_s_hdiag_mat_mod.F90 diff --git a/gpu/psb_s_hlg_mat_mod.F90 b/cuda/psb_s_hlg_mat_mod.F90 similarity index 100% rename from gpu/psb_s_hlg_mat_mod.F90 rename to cuda/psb_s_hlg_mat_mod.F90 diff --git a/gpu/psb_s_hybg_mat_mod.F90 b/cuda/psb_s_hybg_mat_mod.F90 similarity index 100% rename from gpu/psb_s_hybg_mat_mod.F90 rename to cuda/psb_s_hybg_mat_mod.F90 diff --git a/gpu/psb_s_vectordev_mod.F90 b/cuda/psb_s_vectordev_mod.F90 similarity index 100% rename from gpu/psb_s_vectordev_mod.F90 rename to cuda/psb_s_vectordev_mod.F90 diff --git a/gpu/psb_vectordev_mod.f90 b/cuda/psb_vectordev_mod.f90 similarity index 100% rename from gpu/psb_vectordev_mod.f90 rename to cuda/psb_vectordev_mod.f90 diff --git a/gpu/psb_z_csrg_mat_mod.F90 b/cuda/psb_z_csrg_mat_mod.F90 similarity index 100% rename from gpu/psb_z_csrg_mat_mod.F90 rename to cuda/psb_z_csrg_mat_mod.F90 diff --git a/gpu/psb_z_diag_mat_mod.F90 b/cuda/psb_z_diag_mat_mod.F90 similarity index 100% rename from gpu/psb_z_diag_mat_mod.F90 rename to cuda/psb_z_diag_mat_mod.F90 diff --git a/gpu/psb_z_dnsg_mat_mod.F90 b/cuda/psb_z_dnsg_mat_mod.F90 similarity index 100% rename from gpu/psb_z_dnsg_mat_mod.F90 rename to cuda/psb_z_dnsg_mat_mod.F90 diff --git a/gpu/psb_z_elg_mat_mod.F90 b/cuda/psb_z_elg_mat_mod.F90 similarity index 100% rename from gpu/psb_z_elg_mat_mod.F90 rename to cuda/psb_z_elg_mat_mod.F90 diff --git a/gpu/psb_z_gpu_vect_mod.F90 b/cuda/psb_z_gpu_vect_mod.F90 similarity index 100% rename from gpu/psb_z_gpu_vect_mod.F90 rename to cuda/psb_z_gpu_vect_mod.F90 diff --git a/gpu/psb_z_hdiag_mat_mod.F90 b/cuda/psb_z_hdiag_mat_mod.F90 similarity index 100% rename from gpu/psb_z_hdiag_mat_mod.F90 rename to cuda/psb_z_hdiag_mat_mod.F90 diff --git a/gpu/psb_z_hlg_mat_mod.F90 b/cuda/psb_z_hlg_mat_mod.F90 similarity index 100% rename from gpu/psb_z_hlg_mat_mod.F90 rename to cuda/psb_z_hlg_mat_mod.F90 diff --git a/gpu/psb_z_hybg_mat_mod.F90 b/cuda/psb_z_hybg_mat_mod.F90 similarity index 100% rename from gpu/psb_z_hybg_mat_mod.F90 rename to cuda/psb_z_hybg_mat_mod.F90 diff --git a/gpu/psb_z_vectordev_mod.F90 b/cuda/psb_z_vectordev_mod.F90 similarity index 100% rename from gpu/psb_z_vectordev_mod.F90 rename to cuda/psb_z_vectordev_mod.F90 diff --git a/gpu/s_cusparse_mod.F90 b/cuda/s_cusparse_mod.F90 similarity index 100% rename from gpu/s_cusparse_mod.F90 rename to cuda/s_cusparse_mod.F90 diff --git a/gpu/scusparse.c b/cuda/scusparse.c similarity index 100% rename from gpu/scusparse.c rename to cuda/scusparse.c diff --git a/cuda/spgpu/Makefile b/cuda/spgpu/Makefile new file mode 100644 index 00000000..85801942 --- /dev/null +++ b/cuda/spgpu/Makefile @@ -0,0 +1,39 @@ +TOP=../.. +include $(TOP)/Make.inc +# +# Libraries used +# +LIBDIR=$(TOP)/lib +INCDIR=$(TOP)/include +MODDIR=$(TOP)/modules +LIBNAME=libspgpu.a + +OBJS=coo.o core.o dia.o ell.o hdia.o hell.o + +all: includes objs + +objs: $(OBJS) iobjs + +lib: objs iobjs ilib + ar cur $(LIBNAME) $(OBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + +iobjs: + $(MAKE) -C kernels objs +ilib: + $(MAKE) -C kernels lib LIBNAME=$(LIBNAME) + +includes: + /bin/cp -p *.h $(INCDIR) + +clean: iclean + /bin/rm -fr $(OBJS) $(LIBNAME) +iclean: + $(MAKE) -C kernels clean +.c.o: + $(CC) $(CCOPT) $(CINCLUDES) $(CDEFINES) $(CUDA_INCLUDES) -c $< +.cpp.o: + $(CXX) $(CXXOPT) $(CXXINCLUDES) $(CXXDEFINES) $(CUDA_INCLUDES) -c $< -o $@ + +.cu.o: + $(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) $(CUDA_INCLUDES) -c $< diff --git a/cuda/spgpu/coo.cpp b/cuda/spgpu/coo.cpp new file mode 100644 index 00000000..eaeba95e --- /dev/null +++ b/cuda/spgpu/coo.cpp @@ -0,0 +1,98 @@ +#include "coo_conv.h" +#include "core.h" + +#include + +// returns the number of non-zero blocks +int computeBcooSize(int blockRows, int blockCols, const int* rows, const int* cols, int nonZeros) +{ + // use a map to count al non zero blocks + std::map blocksPositions; + + int blockCount = 0; + + int i; + for (i=0; i::iterator it = blocksPositions.find(blockId); + + // not found + if(it == blocksPositions.end()) + { + blocksPositions[blockId] = blockCount; + ++blockCount; + } + } + + return blockCount; +} + + +void cooToBcoo(int* bRows, int* bCols, void* blockValues, /*int isBlockColumnMajor,*/ int blockRows, int blockCols, + const int* rows, const int* cols, const void* values, int nonZeros, spgpuType_t valuesType) +{ + // use a map to count al non zero blocks + std::map blocksPositions; + + int blockCount = 0; + + size_t elementSize = spgpuSizeOf(valuesType); + size_t blockElementSize = elementSize*blockRows*blockCols; + int i; + for (i=0; i::iterator it = blocksPositions.find(blockId); + + int blockPos; + + // not found + if(it == blocksPositions.end()) + { + blocksPositions[blockId] = blockCount; + blockPos = blockCount; + + bRows[blockCount] = blockRowId; + bCols[blockCount] = blockColId; + + memset((char*)blockValues + blockCount*blockElementSize, 0, blockElementSize); + + ++blockCount; + } + else + blockPos = it->second; + + int blockRowOffset = rowId % blockRows; + int blockColOffset = colId % blockCols; + + int blockOffset; + + //if (isBlockColumnMajor) + blockOffset = blockRowOffset + blockColOffset*blockRows; + /*else + blockOffset = blockRowOffset*blockCols + blockColOffset; + */ + + char* dest = (char*)blockValues + blockPos*blockElementSize + blockOffset*elementSize; + const char* src = (const char*)values + elementSize*i; + + memcpy(dest, src, elementSize); + } +} + + diff --git a/cuda/spgpu/coo_conv.h b/cuda/spgpu/coo_conv.h new file mode 100644 index 00000000..feccb63a --- /dev/null +++ b/cuda/spgpu/coo_conv.h @@ -0,0 +1,40 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include "core.h" + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +int computeBcooSize(int blockRows, int blockCols, const int* rows, const int* cols, int nonZeros); + +// column-major format for blocks +void cooToBcoo(int* bRows, int* bCols, void* blockValues, /*int isBlockColumnMajor,*/ int blockRows, int blockCols, + const int* rows, const int* cols, const void* values, int nonZeros, spgpuType_t valuesType); + +#ifdef __cplusplus +} +#endif + +/** @}*/ diff --git a/cuda/spgpu/core.c b/cuda/spgpu/core.c new file mode 100644 index 00000000..e89aa2cf --- /dev/null +++ b/cuda/spgpu/core.c @@ -0,0 +1,109 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "core.h" +#include "stdlib.h" +#include "cuda_runtime.h" + +spgpuStatus_t spgpuCreate(spgpuHandle_t* pHandle, int device) +{ + struct cudaDeviceProp deviceProperties; + cudaError_t err = cudaGetDeviceProperties(&deviceProperties, device); + + SpgpuHandleStruct* handle = (SpgpuHandleStruct*) malloc(sizeof(SpgpuHandleStruct)); + + int currentDevice; + cudaGetDevice(¤tDevice); + cudaSetDevice(device); + cudaStreamCreate(&handle->defaultStream); + handle->currentStream = handle->defaultStream; + cudaSetDevice(currentDevice); + + handle->device = device; + handle->warpSize = deviceProperties.warpSize; + handle->maxThreadsPerBlock = deviceProperties.maxThreadsPerBlock; + handle->multiProcessorCount = deviceProperties.multiProcessorCount; + handle->maxGridSizeX = deviceProperties.maxGridSize[0]; + handle->maxGridSizeY = deviceProperties.maxGridSize[1]; + handle->maxGridSizeZ = deviceProperties.maxGridSize[2]; + handle->capabilityMajor = deviceProperties.major; + handle->capabilityMinor = deviceProperties.minor; + + *pHandle = handle; + + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else + return SPGPU_UNSPECIFIED; +} + +void spgpuDestroy(spgpuHandle_t pHandle) +{ + cudaStreamDestroy(pHandle->defaultStream); + + free((void*)pHandle); +} + +void spgpuStreamCreate(spgpuHandle_t pHandle, cudaStream_t* stream) +{ + int currentDevice; + cudaGetDevice(¤tDevice); + cudaSetDevice(pHandle->device); + cudaStreamCreate(stream); + cudaSetDevice(currentDevice); +} + +void spgpuStreamDestroy(cudaStream_t stream) +{ + cudaStreamDestroy(stream); +} + +void spgpuSetStream(spgpuHandle_t pHandle, cudaStream_t stream) +{ + SpgpuHandleStruct* handle = (SpgpuHandleStruct*)pHandle; + + if (stream) + { + handle->currentStream = stream; + } + else + handle->currentStream = pHandle->defaultStream; +} + +cudaStream_t spgpuGetStream(spgpuHandle_t pHandle) +{ + SpgpuHandleStruct* handle = (SpgpuHandleStruct*)pHandle; + return handle->currentStream; +} + +size_t spgpuSizeOf(spgpuType_t typeCode) +{ + switch (typeCode) + { + case SPGPU_TYPE_INT: + return sizeof(int); + case SPGPU_TYPE_FLOAT: + return sizeof(float); + case SPGPU_TYPE_DOUBLE: + return sizeof(double); + case SPGPU_TYPE_COMPLEX_FLOAT: + return sizeof(cuFloatComplex); + case SPGPU_TYPE_COMPLEX_DOUBLE: + return sizeof(cuDoubleComplex); + default: + return 0; // error + } +} diff --git a/cuda/spgpu/core.h b/cuda/spgpu/core.h new file mode 100644 index 00000000..46d98f64 --- /dev/null +++ b/cuda/spgpu/core.h @@ -0,0 +1,185 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +/*! \mainpage The spGPU library documentation + * + * \section intro_sec Introduction + * + * spGPU is a set of custom matrix storages and CUDA kernels for sparse linear algebra computing on GPU. It isn't a replacement for cuBLAS/cuSPARSE that should be used for a full featured linear algebra environment on GPU.\n + * The main matrix storage used by spGPU is a GPU-friendly ELLpack format, as well as our HELL (Hacked ELLpack) format, an enhanced version of ELLpack with some interesting memory saving properties.\n + * HELL format provides a better memory storage compared to ELL (it avoids allocation inefficency provided by spikes in row sizes), while providing quite the same performances for sparse matrix-vector multiply routine.. + * + * \section install_sec How to build spgpu + * \subsection linuxbuild Linux (and other unix systems) + * cd spgpu/build/cmake\n + * sh configure.sh\n + * make + * \section cr_sec Copyright + * Copyright (C) 2010 - 2015\n + * Davide Barbieri - University of Rome Tor Vergata\n + * Valeria Cardellini - University of Rome Tor Vergata\n + * Salvatore Filippone - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or\n + * modify it under the terms of the GNU General Public License\n + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful,\n + * but WITHOUT ANY WARRANTY; without even the implied warranty of\n + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n + * GNU General Public License for more details. + */ + + +#include "driver_types.h" +#include "cuComplex.h" + +/** \addtogroup coreFun Core Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/// __host pointers reference host allocations (it's just a placeholder) +#define __host +/// __device pointers reference device allocations (it's just a placeholder) +#define __device + +/// The return code for synchronous functions +typedef int spgpuStatus_t; + +#define SPGPU_SUCCESS 0 +#define SPGPU_UNSUPPORTED 1 +#define SPGPU_UNSPECIFIED 2 +#define SPGPU_OUTOFMEMORY 3 + +/// Code to identify a primitive type +typedef int spgpuType_t; + +#define SPGPU_TYPE_INT 0 +#define SPGPU_TYPE_FLOAT 1 +#define SPGPU_TYPE_DOUBLE 2 +#define SPGPU_TYPE_COMPLEX_FLOAT 3 +#define SPGPU_TYPE_COMPLEX_DOUBLE 4 + +/// this struct should be modified only internally by spgpu +typedef struct spgpuHandleStruct { + /// the current stream used by every calls on spgpu routines (passing this handle) + cudaStream_t currentStream; + /// the default stream, created during the handle creation. + cudaStream_t defaultStream; + /// the device associated to this handle + int device; + /// the warp size for this device + int warpSize; + /// the max threads per block count for this device + int maxThreadsPerBlock; + /// the max size for the X coordinate of the grid dimensions + int maxGridSizeX; + /// the max size for the Y coordinate of the grid dimensions + int maxGridSizeY; + /// the max size for the Z coordinate of the grid dimensions + int maxGridSizeZ; + /// Number of SM + int multiProcessorCount; + // compute capability + int capabilityMajor; + int capabilityMinor; +} SpgpuHandleStruct; + +/// A spGPU handle represents a single CUDA device on your platform. +typedef const SpgpuHandleStruct* spgpuHandle_t; + +/** +* \fn spgpuStatus_t spgpuCreate(spgpuHandle_t* pHandle, int device) +* Create a spgpu context for a GPU device. Every call to spgpu routines using this +* handle will execute on the same GPU. This is re-entrant, so it will work if used by multiple host threads. +* \param pHandle outputs the handle +* \param device id of the device to be used by this context +*/ +spgpuStatus_t spgpuCreate(spgpuHandle_t* pHandle, int device); + +/** +* \fn void spgpuDestroy(spgpuHandle_t pHandle) +* Destroy the spgpu context for pHandle. +* \param pHandle the handle previously created with spgpuCreate(). +*/ +void spgpuDestroy(spgpuHandle_t pHandle); + +/** +* \fn void spgpuStreamCreate(spgpuHandle_t pHandle, cudaStream_t* stream) +* Create a cuda stream according to the device of the spgpu handle. +* \param stream outputs the new stream +* \param pHandle the handle used to obtain the device id for the stream +*/ +void spgpuStreamCreate(spgpuHandle_t pHandle, cudaStream_t* stream); + +/** +* \fn void spgpuStreamDestroy(cudaStream_t stream) +* Destroy a stream, previously created with spgpuStreamCreate(). +* \param stream the stream to destroy +*/ +void spgpuStreamDestroy(cudaStream_t stream); + +/** +* \fn void spgpuSetStream(spgpuHandle_t pHandle, cudaStream_t stream) +* Change the current stream for the handle pHandle. +* \param pHandle the handle to configure. +* \param stream the stream to use for next spgpu routines call. Use 0 to reset to the default stream. +*/ +void spgpuSetStream(spgpuHandle_t pHandle, cudaStream_t stream); + +/** +* \fn cudaStram_t spgpuGetStream(spgpuHandle_t pHandle) +* Get the current stream from the handle pHandle. +* \param pHandle the handle from which get the stream. +*/ +cudaStream_t spgpuGetStream(spgpuHandle_t pHandle); + +/** +* \fn size_t spgpuSizeOf(spgpuType_t typeCode) +* Returns the size, in bytes, of the type denoted by typeCode (e.g. 4 for SPGPU_TYPE_FLOAT, 8 for SPGPU_TYPE_DOUBLE). +* \param typeCode outputs the handle +*/ +size_t spgpuSizeOf(spgpuType_t typeCode); + +/* +typedef struct { +spgpuMatrix + +spgpuMatrixType_t MatrixType; +spgpuFillMode_t FillMode; +spgpuDiagType_t DiagType; +int baseIndex; +} spgpuMatrixDesc_t +*/ + +#define cuFloatComplex_isZero(a) (a.x == 0.0f && a.y == 0.0f) +#define cuDoubleComplex_isZero(a) (a.x == 0.0 && a.y == 0.0) +#define cuFloatComplex_isNotZero(a) (a.x != 0.0f || a.y != 0.0f) +#define cuDoubleComplex_isNotZero(a) (a.x != 0.0 || a.y != 0.0) + +#ifdef __cplusplus +} +#endif + +/** @}*/ + diff --git a/cuda/spgpu/debug.h b/cuda/spgpu/debug.h new file mode 100644 index 00000000..d5bc6de2 --- /dev/null +++ b/cuda/spgpu/debug.h @@ -0,0 +1,58 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "stdlib.h" + +#ifndef _WIN32 +#include + +inline +void printTrace (void) +{ + void *array[32]; + size_t size; + char **strings; + size_t i; + + size = backtrace (array, 32); + strings = backtrace_symbols (array, size); + + printf ("---- Obtained %zd stack frames.\n", size); + + for (i = 0; i < size; i++) + printf ("%s\n", strings[i]); + + free (strings); +} +#endif + +inline void __assert(int e, const char* w) +{ + if (!e) + { + printf("%s\n",w); + +#ifndef _WIN32 + printTrace(); +#endif + + exit(0); + } +} + diff --git a/cuda/spgpu/dia.c b/cuda/spgpu/dia.c new file mode 100644 index 00000000..cf761c5c --- /dev/null +++ b/cuda/spgpu/dia.c @@ -0,0 +1,105 @@ +#include "dia.h" +#include "dia_conv.h" +#include "stdlib.h" + +int computeDiaAllocPitch(int rowsCount) +{ + // returns a pitch good for indices and values + return ((rowsCount + 31)/32)*32; +} + +int computeDiaDiagonalsCount( + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices) +{ + int* diagIds = (int*)malloc((rowsCount + columnsCount - 1)*sizeof(int)); + int diagonalsCount = 0; + int i; + + for (i=0; i<(rowsCount + columnsCount - 1); ++i) + diagIds[i] = -1; + + for (i=0; i + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +int computeDiaDiagonalsCount( + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices); + + +void coo2dia( + void* values, + int* offsets, + int valuesPitch, + int diagonals, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType); + + +/** +* \fn int computeDiaAllocPitch(int rowsCount) + * This function returns a pitch (in number of elements) that can be used to allocate the values array for DIA matrix format. + * \param rowsCount the rows count + * \return the pitch for an DIA matrix of rowsCount rows. +*/ +int computeDiaAllocPitch(int rowsCount); + + +#ifdef __cplusplus +} +#endif + +/** @}*/ diff --git a/cuda/spgpu/ell.c b/cuda/spgpu/ell.c new file mode 100644 index 00000000..33381c28 --- /dev/null +++ b/cuda/spgpu/ell.c @@ -0,0 +1,202 @@ +#include "ell.h" +#include "ell_conv.h" +#include "stdlib.h" + +void computeEllRowLenghts( + int *ellRowLengths, + int *ellMaxRowSize, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + int cooBaseIndex + ) +{ + // find the max number of non zero per row + int maxRowSize = 0; + int i; + for (i=0; i maxRowSize) + maxRowSize = currCount; + } + + *ellMaxRowSize = maxRowSize; +} + +int computeEllAllocPitch(int rowsCount) +{ + // returns a pitch good for indices and values + return ((rowsCount + 31)/32)*32; +} + +void cooToEll( + void *ellValues, + int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int ellMaxRowSize, + int ellBaseIndex, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ) +{ + + size_t elementSize = spgpuSizeOf(valuesType); + + // fill values and indices + int* currentPos = (int*)malloc(rowsCount*sizeof(int)); + int i; + + for (i=0; i dstRs[j]) { + app_dstRs[k] = dstRs[i]; + app_rIdx[k] = rIdx[i]; + + ++k; ++i; + } else { + app_dstRs[k] = dstRs[j]; + app_rIdx[k] = rIdx[j]; + + ++k; ++j; + } + } + + while (i<=center) + { + app_dstRs[k] = dstRs[i]; + app_rIdx[k] = rIdx[i]; + + ++k; ++i; + } + + while (j<=end) + { + app_dstRs[k] = dstRs[j]; + app_rIdx[k] = rIdx[j]; + + ++k; ++j; + } + + for (k=start; k<=end; k++) + { + dstRs[k] = app_dstRs[k-start]; + rIdx[k] = app_rIdx[k-start]; + } +} + +void mergesort(int *dstRs, int *rIdx, int size) { + int* app_dstRs = (int*)malloc(size*sizeof(int)); + int* app_rIdx = (int*)malloc(size*sizeof(int)); + + int sizetomerge=size-1; + size--; + int i; + int n=2; + + while (nsizetomerge) + merge (app_dstRs, app_rIdx, dstRs, rIdx, sizetomerge -((sizetomerge)%n),sizetomerge,size,size); + sizetomerge=sizetomerge-((sizetomerge+1)%n);} + n=n*2; + } + + if (size>sizetomerge) + merge (app_dstRs, app_rIdx, dstRs,rIdx,0,size-(size-sizetomerge),size,size); + + free(app_dstRs); + free(app_rIdx); +} + + + +void ellToOell( + int *rIdx, + void *dstEllValues, + int *dstEllIndices, + int *dstRs, + const void *srcEllValues, + const int *srcEllIndices, + const int *srcRs, + int ellValuesPitch, + int ellIndicesPitch, + int rowsCount, + spgpuType_t valuesType + ) +{ + size_t elementSize = spgpuSizeOf(valuesType); + + int i,j; + for (i=0; i + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** +* \fn void computeEllRowLenghts(int *ellRowLengths, int *ellMaxRowSize, int rowsCount, int nonZerosCount, const int* cooRowIndices, int cooBaseIndex) + * Compute the Ell row lengths array (and the greatest row size) from the COO matrix format. + * \param ellRowLengths Array of length rowsCount to be filled by the non zeros count for every matrix row + * \param ellMaxRowSize outputs the greatest row size (in non zeros) + * \param rowsCount the number of rows of the coo matrix to convert + * \param nonZerosCount the non zeros count of the coo matrix to convert + * \param cooRowIndices the row indices array for the coo matrix to convert + * \param cooBaseIndex the input base index (e.g. 0 for C, 1 for Fortran) + */ +void computeEllRowLenghts( + int *ellRowLengths, + int *ellMaxRowSize, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + int cooBaseIndex + ); + +/** +* \fn int computeEllAllocPitch(int rowsCount) + * This function returns a pitch (in number of elements) that can be used to allocate both indices and values arrays for ELL matrix format. + * \param rowsCount the rows count + * \return the pitch for an ELL matrix of rowsCount rows. +*/ +int computeEllAllocPitch(int rowsCount); + + +/** +* \fn void cooToEll(void *ellValues,int *ellIndices,int ellValuesPitch,int ellIndicesPitch,int ellMaxRowSize,int ellBaseIndex,int rowsCount,int nonZerosCount,const int* cooRowIndices,const int* cooColsIndices,const void* cooValues,int cooBaseIndex, spgpuType_t valuesType) + * Convert a matrix in COO format to a matrix in ELL format. + * The matrix is stored in column-major format. The ellValues and ellIndices sizes are ellMaxRowSize * pitch (pitch is in bytes). + * \param ellValues pointer to the area that will be filled by the non zero coefficients + * \param ellIndices pointer to the area that will be filled by the non zero indices + * \param ellValuesPitch the column-major allocation's pitch of ellValues (in number of elements) + * \param ellIndicesPitch the column-major allocation's pitch of ellIndices (in number of elements) + * \param ellMaxRowSize the greatest row size + * \param ellBaseIndex the desired base index for the ELL matrix (e.g. 0 for C, 1 for Fortran) + * \param rowsCount input matrix rows count + * \param nonZerosCount input matrix non zeros count + * \param cooRowIndices input matrix row indices pointer + * \param cooColsIndices input matrix column indices pointer + * \param cooValues input matrix non zeros values pointer + * \param cooBaseIndex input matrix base index + * \param valuesType the type for elements in ellValues and cooValues (i.e. SPGPU_TYPE_FLOAT or SPGPU_TYPE_DOUBLE) + */ +void cooToEll( + void *ellValues, + int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int ellMaxRowSize, + int ellBaseIndex, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ); + +void ellToOell( + int *rIdx, + void *dstEllValues, + int *dstEllIndices, + int *dstRs, + const void *srcEllValues, + const int *srcEllIndices, + const int *srcRs, + int ellValuesPitch, + int ellIndicesPitch, + int rowsCount, + spgpuType_t valuesType + ); + +#ifdef __cplusplus +} +#endif + +/** @}*/ diff --git a/cuda/spgpu/hdia.cpp b/cuda/spgpu/hdia.cpp new file mode 100644 index 00000000..daf74173 --- /dev/null +++ b/cuda/spgpu/hdia.cpp @@ -0,0 +1,374 @@ +#include "hdia_conv.h" +#include "stdlib.h" +#include "string.h" + +#include +#include + +int getHdiaHacksCount(int hackSize, int rowsCount) +{ + return (rowsCount + hackSize - 1)/hackSize; +} + +void computeHdiaHackOffsets( + int *allocationHeight, + int *hackOffsets, + int hackSize, + const void* diaValues, + int diaValuesPitch, + int diagonals, + int rowsCount, + spgpuType_t valuesType + ) +{ + int i,r,s, hack; + int hackCount = getHdiaHacksCount(hackSize, rowsCount); + + size_t elementSize = spgpuSizeOf(valuesType); + + int hackHeight = 0; + + hackOffsets[0] = 0; + for (hack=0; hack= rowsCount) + break; + + const char* val = (char*)diaValues + elementSize*(row + i*diaValuesPitch); + + for (s=0; s= rowsCount) + break; + + const char* val = (const char*)diaValues + elementSize*(row + i*diaValuesPitch); + + for (s=0; s= rowsCount) + break; + + char* dest = (char*)hdiaValues + elementSize*((posOffset + i)*hackSize + r); + const char* src = (const char*)diaValues + elementSize*(row + diagPosInsideDia*diaValuesPitch); + + memcpy(dest, src, elementSize); + } + } + } +} + + + + + + + +void computeHdiaHackOffsetsFromCoo( + int *allocationHeight, + int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + int cooBaseIndex + ) +{ + + int i,j,h; + + int hackCount = getHdiaHacksCount(hackSize, rowsCount); + + + // Find rows per hack + std::vector *rowsPerHack = new std::vector [hackCount]; + + for (i=0; i diagIdsToPos; + + hackOffsets[0] = 0; + for (h=0; h *hackRows = &rowsPerHack[h]; + int hackRowsSize = hackRows->size(); + + for (j=0; jat(j); + int rowIdx = cooRowIndices[i]; + int colIdx = cooColsIndices[i]; + int diagId = (colIdx-cooBaseIndex) - ((rowIdx-cooBaseIndex) % hackSize); + int diagPos = hackSize - 1 + diagId; + + std::map::iterator it = diagIdsToPos.find(diagPos); + + if(it == diagIdsToPos.end()) + { + diagIdsToPos[diagPos] = 1; + ++diagonalsCount; + } + } + + hackOffsets[h+1] = hackOffsets[h] + diagonalsCount; + } + + *allocationHeight = hackOffsets[hackCount]; + + delete[] rowsPerHack; +} + +void cooToHdia_size( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + size_t elementSize + ) +{ + int i,j,h; + + int hackCount = getHdiaHacksCount(hackSize, rowsCount); + + // Find rows per hack + std::vector *rowsPerHack = new std::vector [hackCount]; + + for (i=0; i hackDiagIdsToPos; + + for (h=0; h *hackRows = &rowsPerHack[h]; + int hackRowsSize = hackRows->size(); + + for (j=0; jat(j); + + int rowIdx = cooRowIndices[i]; + int colIdx = cooColsIndices[i]; + int globalDiagId = colIdx - rowIdx; + int diagId = (colIdx - cooBaseIndex) - ((rowIdx - cooBaseIndex) % hackSize); + int diagPos = hackSize - 1 + diagId; + + std::map::iterator it = hackDiagIdsToPos.find(diagPos); + + if(it == hackDiagIdsToPos.end()) + { + hackDiagIdsToPos[diagPos] = globalDiagId; + } + } + + // Reorder diags + for (std::map::iterator it = hackDiagIdsToPos.begin(); it != hackDiagIdsToPos.end(); ++it) + { + int i = it->first; + + int globalDiagId = it->second; + int diagPosInsideOffsets; + int diagId = i - hackSize + 1; + hackDiagIdsToPos[i] = diagPosInsideOffsets = diagonalsCount++; + hdiaOffsets[diagPosInsideOffsets] = globalDiagId; + } + + + hdiaOffsets += diagonalsCount; + + for (j=0; jat(j); + int rowIdx = cooRowIndices[i]; + int colIdx = cooColsIndices[i]; + int diagId = (colIdx - cooBaseIndex) - ((rowIdx - cooBaseIndex) % hackSize); + + int diagPosInsideOffsets = hackDiagIdsToPos[hackSize - 1 + diagId]; + + char* valAddr = (char*)hdiaValues + + elementSize*(((rowIdx - cooBaseIndex) % hackSize) + + hackSize* (hackOffsets[h] + diagPosInsideOffsets)); + + memcpy(valAddr, (const char*)cooValues + i*elementSize, elementSize); + } + } + + delete[] rowsPerHack; +} + + + +void cooToHdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ) +{ + size_t elementSize = spgpuSizeOf(valuesType); + + cooToHdia_size(hdiaValues, hdiaOffsets, + hackOffsets, hackSize, rowsCount, + columnsCount, nonZerosCount, + cooRowIndices, cooColsIndices, cooValues, cooBaseIndex, elementSize); +} + +void bcooToBhdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType, + int blockSize + ) +{ + size_t elementSize = blockSize*spgpuSizeOf(valuesType); + + cooToHdia_size(hdiaValues, hdiaOffsets, + hackOffsets, hackSize, rowsCount, + columnsCount, nonZerosCount, + cooRowIndices, cooColsIndices, cooValues, cooBaseIndex, elementSize); +} + diff --git a/cuda/spgpu/hdia.h b/cuda/spgpu/hdia.h new file mode 100644 index 00000000..e8808fb7 --- /dev/null +++ b/cuda/spgpu/hdia.h @@ -0,0 +1,159 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2013 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "core.h" + + +/** \addtogroup diaFun DIA/HDIA Format + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + + +/** +* \fn spgpuShdiaspmv (spgpuHandle_t handle, float* z, const float *y, float alpha, const float* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const float *x, float beta) + * Computes single precision z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuShdiaspmv (spgpuHandle_t handle, + float* z, + const float *y, + float alpha, + const float* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const float *x, + float beta); + + +/** +* \fn spgpuDhdiaspmv (spgpuHandle_t handle, double* z, const double *y, double alpha, const double* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const double *x, double beta) + * Computes double precision z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuDhdiaspmv (spgpuHandle_t handle, + double* z, + const double *y, + double alpha, + const double* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const double *x, + double beta); + + +/** +* \fn spgpuChdiaspmv (spgpuHandle_t handle, cuFloatComplex* z, const cuFloatComplex *y, cuFloatComplex alpha, const cuFloatComplex* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const cuFloatComplex *x, cuFloatComplex beta) + * Computes single precision complex z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuChdiaspmv (spgpuHandle_t handle, + cuFloatComplex* z, + const cuFloatComplex *y, + cuFloatComplex alpha, + const cuFloatComplex* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const cuFloatComplex *x, + cuFloatComplex beta); + + +/** +* \fn spgpuZhdiaspmv (spgpuHandle_t handle, cuDoubleComplex* z, const cuDoubleComplex *y, cuDoubleComplex alpha, const cuDoubleComplex* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const cuDoubleComplex *x, cuDoubleComplex beta) + * Computes double precision complex z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuZhdiaspmv (spgpuHandle_t handle, + cuDoubleComplex* z, + const cuDoubleComplex *y, + cuDoubleComplex alpha, + const cuDoubleComplex* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const cuDoubleComplex *x, + cuDoubleComplex beta); + +/** @}*/ + +#ifdef __cplusplus +} +#endif + diff --git a/cuda/spgpu/hdia_conv.h b/cuda/spgpu/hdia_conv.h new file mode 100644 index 00000000..c22bf990 --- /dev/null +++ b/cuda/spgpu/hdia_conv.h @@ -0,0 +1,102 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2013 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "core.h" + + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +int getHdiaHacksCount(int hackSize, int rowsCount); + +void computeHdiaHackOffsets( + int *allocationHeight, + int *hackOffsets, + int hackSize, + const void* diaValues, + int diaValuesPitch, + int diagonals, + int rowsCount, + spgpuType_t valuesType); + +void diaToHdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + const void* diaValues, + const int* diaOffsets, + int diaValuesPitch, + int diagonals, + int rowsCount, + spgpuType_t valuesType + ); + +void computeHdiaHackOffsetsFromCoo( + int *allocationHeight, + int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + int cooBaseIndex + ); + +void cooToHdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ); + +void bcooToBhdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType, + int blockSize); + +#ifdef __cplusplus +} +#endif + + +/** @}*/ diff --git a/cuda/spgpu/hell.c b/cuda/spgpu/hell.c new file mode 100644 index 00000000..07764ee3 --- /dev/null +++ b/cuda/spgpu/hell.c @@ -0,0 +1,104 @@ +#include "hell.h" +#include "hell_conv.h" + +void computeHellAllocSize( + int* allocationHeight, + int hackSize, + int rowsCount, + const int *ellRowLengths + ) +{ + int totalLen = 0; + int i; + int remainings; + int done; + int maxLen; + + for (i=0; i maxLen) + maxLen = curLen; + } + totalLen += maxLen; + } + + remainings = rowsCount % hackSize; + done = (rowsCount/hackSize)*hackSize; + maxLen = 0; + + for (i=0; i maxLen) + maxLen = curLen; + } + + *allocationHeight = totalLen + maxLen; +} + +void ellToHell( + void *hellValues, + int *hellIndices, + int* hackOffsets, + int hackSize, + + const void *ellValues, + const int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int *ellRowLengths, + int rowsCount, + spgpuType_t valuesType + ) +{ + + size_t elementSize = spgpuSizeOf(valuesType); + + int hacks = (rowsCount + hackSize - 1)/hackSize; + + char* currValPos = (char*)hellValues; + int* currIndPos = hellIndices; + + int hackOffset = 0; + int i; + for (i=0; i= rowsCount) + break; + + rowLen = ellRowLengths[row]; + + if (rowLen > maxLen) + maxLen = rowLen; + + for (k=0; k + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** +* \fn void computeHellAllocSize(int* allocationHeight, int hackSize, int rowsCount, const int *ellRowLengths) + * Compute the HELL format allocation's height for the Hell allocation + * (the resulting size should be allocationHeight*hackSize*sizeof(elementsType)). + * \param allocationHeight outputs the total allocation's height + * \param hackSize the hack size for this matrix (32 or 64 are good choices) + * \param rowsCount the rows count + * \param ellRowLengths the row lengths array from the ell matrix to convert +*/ +void computeHellAllocSize( + int* allocationHeight, + int hackSize, + int rowsCount, + const int *ellRowLengths + ); + +/** +* \fn void ellToHell(void *hellValues, int *hellIndices, int* hackOffsets, int hackSize, const void *ellValues, const int *ellIndices, int ellValuesPitch, int ellIndicesPitch, int *ellRowLengths, int rowsCount, spgpuType_t valuesType) + * Convert a matrix from the ELL format to the HELL format. + * \param hellValues pointer to the area that will be filled by the non zero coefficients + * \param hellIndices pointer to the area that will be filled by the non zero indices + * \param hackOffsets + * \param hackSize the hack size used to allocate hellValues and hellIndices (32 or 64 are good choices) + * \param ellValues the input matrix coefficients + * \param ellIndices the input matrix indices + * \param ellValuesPitch the input values allocation pitch (in number of elements) + * \param ellIndicesPitch the input indices allocation pitch (in number of elements) + * \param ellRowLengths the row lengths array of the input matrix + * \param rowsCount the rows count + * \param valuesType the type of hellValues and ellValues elements (i.e. SPGPU_TYPE_FLOAT or SPGPU_TYPE_DOUBLE) +*/ +void ellToHell( + void *hellValues, + int *hellIndices, + int* hackOffsets, + int hackSize, + const void *ellValues, + const int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int *ellRowLengths, + int rowsCount, + spgpuType_t valuesType + ); + +#ifdef __cplusplus +} +#endif + + +/** @}*/ diff --git a/cuda/spgpu/kernels/Makefile b/cuda/spgpu/kernels/Makefile new file mode 100644 index 00000000..85b97aa8 --- /dev/null +++ b/cuda/spgpu/kernels/Makefile @@ -0,0 +1,33 @@ +TOP=../../.. +include $(TOP)/Make.inc +# +# Libraries used +# +LIBDIR=$(TOP)/lib +INCDIR=$(TOP)/include +MODDIR=$(TOP)/modules +UP=.. +LIBNAME=$(UP)/libspgpu.a +CINCLUDES=-I$(INCDIR) + +OBJS=cabs.o camax.o casum.o caxpby.o caxy.o cdot.o cgath.o \ + cnrm2.o cscal.o cscat.o csetscal.o \ + dabs.o damax.o dasum.o daxpby.o daxy.o ddot.o dgath.o \ + dia_cspmv.o dia_dspmv.o dia_sspmv.o dia_zspmv.o dnrm2.o \ + dscal.o dscat.o dsetscal.o ell_ccsput.o ell_cspmv.o \ + ell_dcsput.o ell_dspmv.o ell_scsput.o ell_sspmv.o ell_zcsput.o ell_zspmv.o \ + hdia_cspmv.o hdia_dspmv.o hdia_sspmv.o hdia_zspmv.o hell_cspmv.o hell_dspmv.o \ + hell_sspmv.o hell_zspmv.o igath.o iscat.o isetscal.o sabs.o samax.o sasum.o \ + saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o \ + zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o + +objs: $(OBJS) +lib: objs + ar cur $(UP)/$(LIBNAME) $(OBJS) + + +clean: + /bin/rm -fr $(OBJS) +.cu.o: + $(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) $(CUDA_INCLUDES) -c $< + diff --git a/cuda/spgpu/kernels/abs_base.cuh b/cuda/spgpu/kernels/abs_base.cuh new file mode 100644 index 00000000..d48f815d --- /dev/null +++ b/cuda/spgpu/kernels/abs_base.cuh @@ -0,0 +1,110 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_FUNC_NAME +#define GEN_SPGPU_FUNC_NAME(x) CONCAT(CONCAT(spgpu,x),abs) + +#define BLOCK_SIZE 256 + +// Define: +//#define RES_VALUE_TYPE +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#include "mathbase.cuh" + +__device__ __host__ static inline bool is_one_float(float x) { return (x==1.0f); } +__device__ __host__ static inline bool is_one_cuFloatComplex(cuFloatComplex x) { return ((x.x==1.0f)&&(x.y==0.0f));} + + +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static inline bool is_one_double(double x) { return (x==1.0); } +__device__ __host__ static inline bool is_one_cuDoubleComplex(cuDoubleComplex x) { return ((x.x==1.0)&&(x.y==0.0));} +#endif + + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern_alpha) + (RES_VALUE_TYPE *y, int n, RES_VALUE_TYPE alpha, VALUE_TYPE* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since y, and x are accessed with the same offset by the same thread, + // and the write to y follows the read of x, then x could be y. + + y[id] = CONCAT(RES_VALUE_TYPE, _mul)(alpha, CONCAT(VALUE_TYPE, _abs)(x[id])); + } +} + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern) + (RES_VALUE_TYPE *y, int n, VALUE_TYPE* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since y, and x are accessed with the same offset by the same thread, + // and the write to y follows the read of x, then x could be y. + + y[id] = CONCAT(VALUE_TYPE, _abs)(x[id]); + } +} + +void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (spgpuHandle_t handle, RES_VALUE_TYPE *y, int n, RES_VALUE_TYPE alpha, VALUE_TYPE* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + + if (CONCAT(is_one_,RES_VALUE_TYPE)(alpha)) + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(y, n, x); + else + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern_alpha)<<currentStream>>>(y, n, alpha, x); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device RES_VALUE_TYPE *y, + int n, + RES_VALUE_TYPE alpha, + __device VALUE_TYPE *x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, y, maxNForACall, alpha, x); + x = x + maxNForACall; + y = y + maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) (handle, y, n, alpha, x); + cudaCheckError("CUDA error on abs"); +} diff --git a/cuda/spgpu/kernels/amax_base.cuh b/cuda/spgpu/kernels/amax_base.cuh new file mode 100644 index 00000000..4d7e7d4a --- /dev/null +++ b/cuda/spgpu/kernels/amax_base.cuh @@ -0,0 +1,233 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_AMAX_NAME +#define GEN_SPGPU_AMAX_NAME(x) CONCAT(CONCAT(spgpu,x),amax) + +// Define: +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#define BLOCK_SIZE 512 + +typedef float absType_float; +typedef float absType_cuFloatComplex; +typedef double absType_double; +typedef double absType_cuDoubleComplex; + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ __host__ static float abs_float(float a) { return fabsf(a); } +__device__ __host__ static float abs_cuFloatComplex(cuFloatComplex a) { return cuCabsf(a); } + + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ __host__ static double abs_double(float a) { return fabs(a); } +__device__ __host__ static double abs_cuDoubleComplex(cuDoubleComplex a) { return cuCabs(a); } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +static __device__ CONCAT(absType_,VALUE_TYPE) CONCAT(TYPE_SYMBOL,amaxReductionResult)[128]; + +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +static __device__ CONCAT(absType_,VALUE_TYPE) amaxvv(VALUE_TYPE a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absa = CONCAT(abs_,VALUE_TYPE)(a); + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return MAX(absa,absb); +} + +static __device__ CONCAT(absType_,VALUE_TYPE) amaxaa(CONCAT(absType_,VALUE_TYPE) a, CONCAT(absType_,VALUE_TYPE) b) +{ + return MAX(a,b); +} + +static __device__ CONCAT(absType_,VALUE_TYPE) amaxav(CONCAT(absType_,VALUE_TYPE) a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return MAX(a,absb); +} + +__global__ void +CONCAT(GEN_SPGPU_AMAX_NAME(TYPE_SYMBOL),_kern) +(int n, VALUE_TYPE* x) +{ + __shared__ CONCAT(absType_,VALUE_TYPE) sSum[BLOCK_SIZE]; + + CONCAT(absType_,VALUE_TYPE) res = 0; + + VALUE_TYPE* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + while (x < lastX) + { + VALUE_TYPE x1 = x[0]; + res = amaxav(res, x1); + + x += blockOffset; + + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + CONCAT(absType_,VALUE_TYPE) tRes[128]; + + CONCAT(GEN_SPGPU_AMAX_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, CONCAT(TYPE_SYMBOL,amaxReductionResult), blocks*sizeof(CONCAT(absType_,VALUE_TYPE))); + + for (int i=0; i= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ __host__ static double abs_double(float a) { return fabs(a); } +__device__ __host__ static double abs_cuDoubleComplex(cuDoubleComplex a) { return cuCabs(a); } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ __host__ static double double_add(double a, double b) { +#ifndef __CUDA__ARCH__ + return a + b; +#else + return PREC_DADD (a, b); +#endif +} +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +static __device__ CONCAT(absType_,VALUE_TYPE) CONCAT(TYPE_SYMBOL,asumReductionResult)[128]; + + +static __device__ CONCAT(absType_,VALUE_TYPE) asumvv(VALUE_TYPE a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absa = CONCAT(abs_,VALUE_TYPE)(a); + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return absa + absb; +} + +static __device__ __host__ CONCAT(absType_,VALUE_TYPE) asumaa(CONCAT(absType_,VALUE_TYPE) a, CONCAT(absType_,VALUE_TYPE) b) +{ + return a + b; +} + +static __device__ CONCAT(absType_,VALUE_TYPE) asumav(CONCAT(absType_,VALUE_TYPE) a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return a + absb; +} + +__global__ void +CONCAT(GEN_SPGPU_ASUM_NAME(TYPE_SYMBOL),_kern) +(int n, VALUE_TYPE* x) +{ + __shared__ CONCAT(absType_,VALUE_TYPE) sSum[BLOCK_SIZE]; + + CONCAT(absType_,VALUE_TYPE) res = 0; + + VALUE_TYPE* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + while (x < lastX) + { + VALUE_TYPE x1 = x[0]; + res = asumav(res, x1); + + x += blockOffset; + + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + CONCAT(absType_,VALUE_TYPE) tRes[128]; + + CONCAT(GEN_SPGPU_ASUM_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, CONCAT(TYPE_SYMBOL,asumReductionResult), blocks*sizeof(CONCAT(absType_,VALUE_TYPE))); + + for (int i=0; icurrentStream>>>(z, n, alpha, x, y); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE *z, + int n, + VALUE_TYPE alpha, + __device VALUE_TYPE *x, + __device VALUE_TYPE *y) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, z, maxNForACall, alpha, x, y); + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) (handle, z, n, alpha, x, y); + cudaCheckError("CUDA error on axy"); +} + +void +GEN_SPGPU_MFUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE *z, + int n, + VALUE_TYPE alpha, + __device VALUE_TYPE* x, + __device VALUE_TYPE *y, + int count, + int pitch) +{ + for (int i=0; icurrentStream>>>(w, n, beta, z, alpha, x, y); + +} + +void +GEN_SPGPU_FUNC_NAME_2(TYPE_SYMBOL) + (spgpuHandle_t handle, + __device VALUE_TYPE *w, + int n, + VALUE_TYPE beta, + __device VALUE_TYPE *z, + VALUE_TYPE alpha, + __device VALUE_TYPE* x, + __device VALUE_TYPE *y + ) +{ + + if (CONCAT(VALUE_TYPE, _isZero(alpha))) + { + GEN_SPGPU_SCAL_NAME(TYPE_SYMBOL) + (handle, w, n, beta, z); + } + else if (CONCAT(VALUE_TYPE, _isZero(beta))) { + GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) + (handle, w, n, alpha, x, y); + } + else { + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + + while (n > maxNForACall) //managing large vectors + { + + CONCAT(GEN_SPGPU_FUNC_NAME_2(TYPE_SYMBOL),_) + (handle, w, maxNForACall, beta, z, alpha, x, y); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + w = w + maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME_2(TYPE_SYMBOL),_) + (handle, w, n, beta, z, alpha, x, y); + } + + cudaCheckError("CUDA error on axypbz"); +} + +void +GEN_SPGPU_MFUNC_NAME_2(TYPE_SYMBOL) + (spgpuHandle_t handle, + __device VALUE_TYPE *w, + int n, + VALUE_TYPE beta, + __device VALUE_TYPE *z, + VALUE_TYPE alpha, + __device VALUE_TYPE* x, + __device VALUE_TYPE *y, + int count, + int pitch) +{ + for (int i=0; icurrentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuCaxpby(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + + while (n > maxNForACall) //managing large vectors + { + spgpuCaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuCaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on saxpby"); +} + +void spgpuCmaxpby(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x, + int count, int pitch) +{ + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + + cuFloatComplex tRes[128]; + + spgpuCdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, sdotReductionResult, blocks*sizeof(cuFloatComplex)); + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + float tRes[128]; + + spgpuCnrm2_kern<<currentStream>>>(n, x); + cudaMemcpyFromSymbol(tRes, snrm2ReductionResult,blocks*sizeof(float)); + + for (int i=0; i= 200 +#define PREC_FADD(a,b) ((a) + (b)) +#define PREC_FMUL(a,b) ((a) * (b)) +#else +#define PREC_FADD(a,b) __fadd_rn((a),(b)) +#define PREC_FMUL(a,b) __fmul_rn((a),(b)) +#endif + +#define PREC_DADD(a,b) ((a) + (b)) +#define PREC_DMUL(a,b) ((a) * (b)) + + +inline __host__ __device__ double2 make_double2(double s) +{ + return make_double2(s, s); +} + +inline __host__ __device__ double2 operator+(double2 a, double2 b) +{ + return make_double2(a.x + b.x, a.y + b.y); +} + +inline __host__ __device__ void operator+=(double2 &a, double2 b) +{ + a.x += b.x; a.y += b.y; +} + +inline __host__ __device__ double2 operator-(double2 a, double2 b) +{ + return make_double2(a.x - b.x, a.y - b.y); +} + +inline __host__ __device__ void operator-=(double2 &a, double2 b) +{ + a.x -= b.x; a.y -= b.y; +} + +inline __host__ __device__ double2 operator*(double2 a, double s) +{ + return make_double2(a.x * s, a.y * s); +} + +inline __host__ __device__ double2 operator*(double s, double2 a) +{ + return make_double2(a.x * s, a.y * s); +} + +inline __host__ __device__ void operator*=(double2 &a, double s) +{ + a.x *= s; a.y *= s; +} diff --git a/cuda/spgpu/kernels/dabs.cu b/cuda/spgpu/kernels/dabs.cu new file mode 100644 index 00000000..7fb9c074 --- /dev/null +++ b/cuda/spgpu/kernels/dabs.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define RES_VALUE_TYPE double +#define TYPE_SYMBOL D +#include "abs_base.cuh" + diff --git a/cuda/spgpu/kernels/damax.cu b/cuda/spgpu/kernels/damax.cu new file mode 100644 index 00000000..1e78cc1f --- /dev/null +++ b/cuda/spgpu/kernels/damax.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#include "amax_base.cuh" + diff --git a/cuda/spgpu/kernels/dasum.cu b/cuda/spgpu/kernels/dasum.cu new file mode 100644 index 00000000..71a37e80 --- /dev/null +++ b/cuda/spgpu/kernels/dasum.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#include "asum_base.cuh" + diff --git a/cuda/spgpu/kernels/daxpby.cu b/cuda/spgpu/kernels/daxpby.cu new file mode 100644 index 00000000..83724ce2 --- /dev/null +++ b/cuda/spgpu/kernels/daxpby.cu @@ -0,0 +1,101 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double alpha, double* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (beta == 0.0) + z[id] = PREC_DMUL(alpha,x[id]); + else + z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + } +} + + +void spgpuDaxpby_(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + spgpuDaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuDaxpby(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + spgpuDaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuDaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on daxpby"); +} + +void spgpuDmaxpby(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x, + int count, int pitch) +{ + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + double tRes[128]; + + spgpuDdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, ddotReductionResult,blocks*sizeof(double)); + + for (int i=0; i= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif + +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_vanilla) +#define GEN_SPGPU_DIA_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),diaspmv_vanilla) +#include "dia_spmv_base_template.cuh" +#if 0 +#undef GEN_SPGPU_DIA_NAME +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_prefetch) +#define GEN_SPGPU_DIA_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),diaspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "dia_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef ENABLE_CACHE +#undef GEN_SPGPU_DIA_NAME +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache_prefetch) +#define GEN_SPGPU_DIA_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache_prefetch) +#include "dia_spmv_base_template.cuh" +#undef GEN_SPGPU_DIA_NAME +#undef USE_PREFETCHING +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache) +#define GEN_SPGPU_DIA_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache) +#include "dia_spmv_base_template.cuh" +#endif + +#undef GEN_SPGPU_DIA_NAME +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv) +void +GEN_SPGPU_DIA_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* dM, + const int* offsets, + int dMPitch, + int rows, + int cols, + int diags, + const VALUE_TYPE *x, + VALUE_TYPE beta) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (rows > maxNForACall) //managing large vectors + { + //if (diags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); + //else + CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); + #if 0 + if (diags < 20) + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); + else + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); +#endif + y = y + maxNForACall; + z = z + maxNForACall; + dM = dM + maxNForACall; + + rows -= maxNForACall; + } + CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); +#if 0 + //if (diags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + //else + if (diags < 20) + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + else + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); +#endif + cudaCheckError("CUDA error on dia_spmv"); +} + diff --git a/cuda/spgpu/kernels/dia_spmv_base_template.cuh b/cuda/spgpu/kernels/dia_spmv_base_template.cuh new file mode 100644 index 00000000..792fa7ed --- /dev/null +++ b/cuda/spgpu/kernels/dia_spmv_base_template.cuh @@ -0,0 +1,217 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define THREAD_BLOCK 128 + +__device__ void +CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, const VALUE_TYPE *x, VALUE_TYPE beta) +{ + int i = threadIdx.x + blockIdx.x * (blockDim.x); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows && CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + dM += i; + + extern __shared__ int offsetsChunk[]; + + int rounds = (diags + blockDim.x - 1)/blockDim.x; + + for (int r = 0; r < rounds; r++) + { + // in the last round diags will be <= blockDim.x + if (threadIdx.x < diags) + offsetsChunk[threadIdx.x] = offsets[threadIdx.x]; + + __syncthreads(); + + if (i < rows) + { + int count = min(diags, blockDim.x ); + + +#ifdef USE_PREFETCHING + int j; + for (j=0; j<=count-3; j += 3) + { + // Prefetch 3 values + int column1 = offsetsChunk[j] + i; + int column2 = offsetsChunk[j+1] + i; + int column3 = offsetsChunk[j+2] + i; + + bool inside1 = column1 >= 0 && column1 < cols; + bool inside2 = column2 >= 0 && column2 < cols; + bool inside3 = column3 >= 0 && column3 < cols; + + // Anticipate global memory read + + VALUE_TYPE xValue1, xValue2, xValue3; + VALUE_TYPE mValue1, mValue2, mValue3; + + if(inside1) + { + mValue1 = dM[0]; +#ifdef ENABLE_CACHE + xValue1 = fetchTex (column1); +#else + xValue1 = x[column1]; +#endif + } + dM += dMPitch; + + if(inside2) + { + mValue2 = dM[0]; +#ifdef ENABLE_CACHE + xValue2 = fetchTex (column2); +#else + xValue2 = x[column2]; +#endif + } + dM += dMPitch; + + if(inside3) + { + mValue3 = dM[0]; +#ifdef ENABLE_CACHE + xValue3 = fetchTex (column3); +#else + xValue3 = x[column3]; +#endif + } + dM += dMPitch; + + if(inside1) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue1, xValue1, zProd); + if(inside2) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue2, xValue2, zProd); + if(inside3) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue3, xValue3, zProd); + } + + for (;j= 0 && column < cols) + { + VALUE_TYPE xValue; +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + + } + + dM += dMPitch; + } +#else + for (int j=0; j= 0 && column < cols) + { + + VALUE_TYPE xValue; + +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + } + + dM += dMPitch; + } +#endif + } + + diags -= blockDim.x; + offsets += blockDim.x; + __syncthreads(); + } + + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (i >= rows) + return; + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[i] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[i] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, const VALUE_TYPE *x) +{ + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, CONCAT(zero_,VALUE_TYPE)()); +} + +__global__ void +CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, const VALUE_TYPE *x, VALUE_TYPE beta) +{ + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); +} + +void +CONCAT(_,GEN_SPGPU_DIA_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, + const VALUE_TYPE *x, VALUE_TYPE beta) +{ + dim3 block (THREAD_BLOCK ); + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK ); + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn) + <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + else + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn_b0) <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x); + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} + diff --git a/cuda/spgpu/kernels/dia_sspmv.cu b/cuda/spgpu/kernels/dia_sspmv.cu new file mode 100644 index 00000000..2870d780 --- /dev/null +++ b/cuda/spgpu/kernels/dia_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "dia.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "dia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/dia_zspmv.cu b/cuda/spgpu/kernels/dia_zspmv.cu new file mode 100644 index 00000000..7e2fe68f --- /dev/null +++ b/cuda/spgpu/kernels/dia_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "dia.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "dia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/dnrm2.cu b/cuda/spgpu/kernels/dnrm2.cu new file mode 100644 index 00000000..450882c4 --- /dev/null +++ b/cuda/spgpu/kernels/dnrm2.cu @@ -0,0 +1,157 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudalang.h" +#include "cudadebug.h" + + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + + +//#define USE_CUBLAS + +//#define ASSUME_LOCK_SYNC_PARALLELISM + + +#define BLOCK_SIZE 512 + +static __device__ double dnrm2ReductionResult[128]; + +__global__ void spgpuDnrm2_kern(int n, double* x) +{ + __shared__ double sSum[BLOCK_SIZE]; + + double res = 0; + + double* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + while (x < lastX) + { + double x1 = x[0]; + res = PREC_DADD(res, PREC_DMUL(x1, x1)); + + x += blockOffset; + + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + double tRes[128]; + + spgpuDnrm2_kern<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, dnrm2ReductionResult,blocks*sizeof(double)); + + for (int i=0; icurrentStream >>> (alpha, cM, rP, cMPitch, rPPitch, rS, nnz, aI, aJ, aVal, baseIndex); +} + +void +GEN_SPGPU_ELL_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, + VALUE_TYPE alpha, + VALUE_TYPE* cM, + const int* rP, + int cMPitch, + int rPPitch, + const int* rS, + int nnz, + int *aI, + int *aJ, + VALUE_TYPE *aVal, + int baseIndex) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (nnz > maxNForACall) //managing large vectors + { + CONCAT(_,GEN_SPGPU_ELL_NAME(TYPE_SYMBOL)) + (handle, alpha, cM, rP, cMPitch, rPPitch, rS, maxNForACall, aI, aJ, aVal, baseIndex); + + aI = aI + maxNForACall; + aJ = aJ + maxNForACall; + aVal = aVal + maxNForACall; + + nnz -= maxNForACall; + } + + CONCAT(_,GEN_SPGPU_ELL_NAME(TYPE_SYMBOL)) + (handle, alpha, cM, rP, cMPitch, rPPitch, rS, nnz, aI, aJ, aVal, baseIndex); + + cudaCheckError("CUDA error on ell_csput"); +} + diff --git a/cuda/spgpu/kernels/ell_dcsput.cu b/cuda/spgpu/kernels/ell_dcsput.cu new file mode 100644 index 00000000..00e60565 --- /dev/null +++ b/cuda/spgpu/kernels/ell_dcsput.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#include "ell_csput_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_dspmv.cu b/cuda/spgpu/kernels/ell_dspmv.cu new file mode 100644 index 00000000..4e682772 --- /dev/null +++ b/cuda/spgpu/kernels/ell_dspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#define TEX_FETCH_TYPE int2 +#include "ell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_scsput.cu b/cuda/spgpu/kernels/ell_scsput.cu new file mode 100644 index 00000000..eeb5c6bb --- /dev/null +++ b/cuda/spgpu/kernels/ell_scsput.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "ell_csput_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_spmv_base.cuh b/cuda/spgpu/kernels/ell_spmv_base.cuh new file mode 100644 index 00000000..e2af5896 --- /dev/null +++ b/cuda/spgpu/kernels/ell_spmv_base.cuh @@ -0,0 +1,154 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_ELL_NAME +#undef X_TEX +#define X_TEX CONCAT(x_tex_, FUNC_SUFFIX) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_vanilla) +#define GEN_SPGPU_ELL_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),ellspmv_vanilla) +#include "ell_spmv_base_template.cuh" +#if 0 +#undef GEN_SPGPU_ELL_NAME +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_prefetch) +#define GEN_SPGPU_ELL_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),ellspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "ell_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef GEN_SPGPU_ELL_NAME +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache_prefetch) +#define GEN_SPGPU_ELL_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache_prefetch) +#include "ell_spmv_base_template.cuh" +#undef GEN_SPGPU_ELL_NAME +#undef USE_PREFETCHING +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache) +#define GEN_SPGPU_ELL_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache) +#include "ell_spmv_base_template.cuh" +#endif +#undef GEN_SPGPU_ELL_NAME +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv) +void +GEN_SPGPU_ELL_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* cM, + const int* rP, + int cMPitch, + int rPPitch, + const int* rS, + const __device int* rIdx, + int avgNnzPerRow, + int maxNnzPerRow, + int rows, + const VALUE_TYPE *x, + VALUE_TYPE beta, + int baseIndex) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (rows > maxNForACall) //managing large vectors + { +#if 0 + if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) +#endif + CONCAT(_,GEN_SPGPU_ELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, maxNForACall, x, beta, baseIndex); +#if 0 + else if (avgNnzPerRow < 20) + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, maxNForACall, x, beta, baseIndex); + else + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, maxNForACall, x, beta, baseIndex); +#endif + y = y + maxNForACall; + z = z + maxNForACall; + cM = cM + maxNForACall; + rP = rP + maxNForACall; + rS = rS + maxNForACall; + + rows -= maxNForACall; + } +#if 0 + if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) +#endif + CONCAT(_,GEN_SPGPU_ELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, rows, x, beta, baseIndex); + #if 0 + else if (avgNnzPerRow < 20) + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, rows, x, beta, baseIndex); + else + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, rows, x, beta, baseIndex); +#endif + cudaCheckError("CUDA error on ell_spmv"); +} + diff --git a/cuda/spgpu/kernels/ell_spmv_base_nors.cuh b/cuda/spgpu/kernels/ell_spmv_base_nors.cuh new file mode 100644 index 00000000..8cd89704 --- /dev/null +++ b/cuda/spgpu/kernels/ell_spmv_base_nors.cuh @@ -0,0 +1,340 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4_noRs) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int rows, int maxNnzPerRow, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[2][THREAD_BLOCK+1]; + + if (i < rows) + { + rP += i; cM += i; + + int rowSizeM = maxNnzPerRow / 4; + + + if ((maxNnzPerRow % 4) > threadIdx.y) + ++rowSizeM; + + rP += threadIdx.y*rPPitch; + cM += threadIdx.y*cMPitch; + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += 4*rPPitch; + + value = cM[0]; + cM += 4*cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y > 1) + temp[threadIdx.y - 2][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y <= 1) + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.y][threadIdx.x]); + + if (threadIdx.y == 1) + temp[1][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[1][threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2_noRs) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, const int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[THREAD_BLOCK]; + + if (i < rows) + { + rP += i; cM += i; + + int rowSizeM = maxNnzPerRow / 2; + + if (threadIdx.y == 0) + { + if (maxNnzPerRow % 2) + ++rowSizeM; + } + else + { + rP += rPPitch; + cM += cMPitch; + } + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y == 1) + temp[threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_noRs) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + rP += i; cM += i; + +#ifdef USE_PREFETCHING + for (int j = 0; j < maxNnzPerRow / 2; j++) + { + int pointers1, pointers2; + VALUE_TYPE values1, values2; + VALUE_TYPE fetches1, fetches2; + + pointers1 = rP[0] - baseIndex; + rP += rPPitch; + pointers2 = rP[0] - baseIndex; + rP += rPPitch; + + values1 = cM[0]; + cM += cMPitch; + + values2 = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetches1 = fetchTex(pointers1); + fetches2 = fetchTex(pointers2); +#else + fetches1 = x[pointers1]; + fetches2 = x[pointers2]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(values1, fetches1, zProd); + zProd = CONCAT(VALUE_TYPE, _fma)(values2, fetches2, zProd); + } + + // odd row size + if (maxNnzPerRow % 2) + { + int pointer = rP[0] - baseIndex; + VALUE_TYPE value = cM[0]; + VALUE_TYPE fetch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#else + for (int j = 0; j < maxNnzPerRow; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#endif + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx_noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rIdx, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + int outRow = 0; + if (i < rows) + { + + outRow = rIdx[i]; + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[outRow]; + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_noRs) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2_noRs) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4_noRs) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + */ +} + + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_noRs) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2_noRs) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4_noRs) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + */ + +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0_noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _noRs) + (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, CONCAT(zero_,VALUE_TYPE)(), baseIndex); +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _noRs) + (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); +} diff --git a/cuda/spgpu/kernels/ell_spmv_base_template.cuh b/cuda/spgpu/kernels/ell_spmv_base_template.cuh new file mode 100644 index 00000000..fa39d8a6 --- /dev/null +++ b/cuda/spgpu/kernels/ell_spmv_base_template.cuh @@ -0,0 +1,426 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define THREAD_BLOCK 128 + +#include "ell_spmv_base_nors.cuh" + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[2][THREAD_BLOCK+1]; + + if (i < rows) + { + rS += i; rP += i; cM += i; + + int rowSize = rS[0]; + int rowSizeM = rowSize / 4; + + + if ((rowSize % 4) > threadIdx.y) + ++rowSizeM; + + rP += threadIdx.y*rPPitch; + cM += threadIdx.y*cMPitch; + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += 4*rPPitch; + + value = cM[0]; + cM += 4*cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y > 1) + temp[threadIdx.y - 2][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y <= 1) + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.y][threadIdx.x]); + + if (threadIdx.y == 1) + temp[1][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[1][threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[THREAD_BLOCK]; + + if (i < rows) + { + rS += i; rP += i; cM += i; + + int rowSize = rS[0]; + int rowSizeM = rowSize / 2; + + if (threadIdx.y == 0) + { + if (rowSize % 2) + ++rowSizeM; + } + else + { + rP += rPPitch; + cM += cMPitch; + } + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y == 1) + temp[threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + rS += i; rP += i; cM += i; + + int rowSize = rS[0]; + +#ifdef USE_PREFETCHING + for (int j = 0; j < rowSize / 2; j++) + { + int pointers1, pointers2; + VALUE_TYPE values1, values2; + VALUE_TYPE fetches1, fetches2; + + pointers1 = rP[0] - baseIndex; + rP += rPPitch; + pointers2 = rP[0] - baseIndex; + rP += rPPitch; + + values1 = cM[0]; + cM += cMPitch; + + values2 = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetches1 = fetchTex(pointers1); + fetches2 = fetchTex(pointers2); +#else + fetches1 = x[pointers1]; + fetches2 = x[pointers2]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(values1, fetches1, zProd); + zProd = CONCAT(VALUE_TYPE, _fma)(values2, fetches2, zProd); + } + + // odd row size + if (rowSize % 2) + { + int pointer = rP[0] - baseIndex; + VALUE_TYPE value = cM[0]; + VALUE_TYPE fetch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#else + for (int j = 0; j < rowSize; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#endif + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, const int* rIdx, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + int outRow = 0; + if (i < rows) + { + + outRow = rIdx[i]; + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[outRow]; + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + */ +} + + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + */ + +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, CONCAT(zero_,VALUE_TYPE)(), baseIndex); +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); +} + +void +CONCAT(_,GEN_SPGPU_ELL_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, + const __device int* rIdx, int avgNnzPerRow, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int avgThreshold; + + if (handle->capabilityMajor < 2) + avgThreshold = 8; + else if (handle->capabilityMajor < 3) + avgThreshold = 16; + else + avgThreshold = 32; + +#if defined(ELL_FORCE_THREADS_1) + dim3 block (THREAD_BLOCK, 1); +#elif defined(ELL_FORCE_THREADS_2) + dim3 block (THREAD_BLOCK, 2); +#else + dim3 block (THREAD_BLOCK, avgNnzPerRow >= avgThreshold ? 2 : 1); +#endif + + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK); + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + if (rIdx) + { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx_noRs), cudaFuncCachePreferL1); + + if (rS) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx_noRs) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rIdx, maxNnzPerRow, rows, x, beta, baseIndex); + } + else + { + + + if (rS) + { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, baseIndex); + } + else + { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_noRs), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0_noRs), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_noRs) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0_noRs) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, baseIndex); + } + } + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} + diff --git a/cuda/spgpu/kernels/ell_sspmv.cu b/cuda/spgpu/kernels/ell_sspmv.cu new file mode 100644 index 00000000..e64e03e8 --- /dev/null +++ b/cuda/spgpu/kernels/ell_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "ell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_zcsput.cu b/cuda/spgpu/kernels/ell_zcsput.cu new file mode 100644 index 00000000..3d0f3293 --- /dev/null +++ b/cuda/spgpu/kernels/ell_zcsput.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#include "ell_csput_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_zspmv.cu b/cuda/spgpu/kernels/ell_zspmv.cu new file mode 100644 index 00000000..a9c0fe64 --- /dev/null +++ b/cuda/spgpu/kernels/ell_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "ell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/gath_base.cuh b/cuda/spgpu/kernels/gath_base.cuh new file mode 100644 index 00000000..a0e77b44 --- /dev/null +++ b/cuda/spgpu/kernels/gath_base.cuh @@ -0,0 +1,86 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_FUNC_NAME +#define GEN_SPGPU_FUNC_NAME(x) CONCAT(CONCAT(spgpu,x),gath) + +#define BLOCK_SIZE 256 + +// Define: +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#include "mathbase.cuh" + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern) + (VALUE_TYPE* values, int count, const int* indices, int firstIndex, const VALUE_TYPE* vector) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < count) + { + int pos = indices[id]-firstIndex; + + if (pos < 0) + return; + + values[id] = vector[pos]; + } +} + +void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (spgpuHandle_t handle, VALUE_TYPE *xValues, int xNnz, + const __device int *xIndices, int xBaseIndex, const VALUE_TYPE* y) +{ + int msize = (xNnz+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(xValues, xNnz, xIndices, xBaseIndex, y); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE *xValues, + int xNnz, + const __device int *xIndices, + int xBaseIndex, + const __device VALUE_TYPE* y) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (xNnz > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, xValues, maxNForACall, xIndices, xBaseIndex, y); + xIndices += maxNForACall; + xValues += maxNForACall; + xNnz -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, xValues, xNnz, xIndices, xBaseIndex, y); + cudaCheckError("CUDA error on gath"); +} + diff --git a/cuda/spgpu/kernels/hdia_cspmv.cu b/cuda/spgpu/kernels/hdia_cspmv.cu new file mode 100644 index 00000000..52360b40 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_cspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "hdia.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL C +#define TEX_FETCH_TYPE cuFloatComplex +#include "hdia_spmv_base.cuh" diff --git a/cuda/spgpu/kernels/hdia_dspmv.cu b/cuda/spgpu/kernels/hdia_dspmv.cu new file mode 100644 index 00000000..5fdb08e9 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_dspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "hdia.h" +} + +#include "debug.h" + +//#define ENABLE_CACHE +#define VALUE_TYPE double +#define TYPE_SYMBOL D +//#define TEX_FETCH_TYPE int2 +#include "hdia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hdia_spmv_base.cuh b/cuda/spgpu/kernels/hdia_spmv_base.cuh new file mode 100644 index 00000000..2c8cfbaf --- /dev/null +++ b/cuda/spgpu/kernels/hdia_spmv_base.cuh @@ -0,0 +1,149 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_HDIA_NAME +#undef X_TEX +#define X_TEX CONCAT(x_tex_, FUNC_SUFFIX) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_vanilla) +#define GEN_SPGPU_HDIA_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_vanilla) +#include "hdia_spmv_base_template.cuh" +#if 0 +#undef GEN_SPGPU_HDIA_NAME +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_prefetch) +#define GEN_SPGPU_HDIA_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "hdia_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef GEN_SPGPU_HDIA_NAME +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache_prefetch) +#define GEN_SPGPU_HDIA_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache_prefetch) +#include "hdia_spmv_base_template.cuh" +#undef GEN_SPGPU_HDIA_NAME +#undef USE_PREFETCHING +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache) +#define GEN_SPGPU_HDIA_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache) +#include "hdia_spmv_base_template.cuh" +#endif +#undef GEN_SPGPU_HDIA_NAME +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv) +void +GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const VALUE_TYPE *x, + VALUE_TYPE beta) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + // maxNForACall should be a multiple of hackSize + maxNForACall = (maxNForACall/hackSize)*hackSize; + + while (rows > maxNForACall) //managing large vectors + { + + CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + //if (avgDiags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + //else + //if (avgDiags < 20) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + //else + //CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + + y = y + maxNForACall; + z = z + maxNForACall; + hackOffsets += maxNForACall/hackSize; + + rows -= maxNForACall; + } + CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta); + //if (avgDiags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + //else + //if (avgDiags < 20) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta); + //else + //CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta); + + cudaCheckError("CUDA error on hdia_spmv"); +} + diff --git a/cuda/spgpu/kernels/hdia_spmv_base_template.cuh b/cuda/spgpu/kernels/hdia_spmv_base_template.cuh new file mode 100644 index 00000000..155179fd --- /dev/null +++ b/cuda/spgpu/kernels/hdia_spmv_base_template.cuh @@ -0,0 +1,253 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define THREAD_BLOCK 128 + +__device__ void +CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, + int rows, int cols, const VALUE_TYPE *x, VALUE_TYPE beta, int hackCount) +{ + int i = threadIdx.x + blockIdx.x * (blockDim.x); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows && CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + int hackId = i / hackSize; + int hackLaneId = i % hackSize; + + + // shared between offsetsChunks and warpHackOffsetTemp + extern __shared__ int dynShrMem[]; + + int hackOffset = 0; + int nextOffset = 0; + + unsigned int laneId = threadIdx.x % warpSize; + unsigned int warpId = threadIdx.x / warpSize; + +#if __CUDA_ARCH__ < 300 + int* warpHackOffset = dynShrMem; + + + if (laneId == 0 && i < rows) + { + warpHackOffset[warpId] = hackOffsets[hackId]; + warpHackOffset[warpId + (blockDim.x / warpSize)] = hackOffsets[hackId+1]; + } + + __syncthreads(); + hackOffset = warpHackOffset[warpId]; + nextOffset = warpHackOffset[warpId + blockDim.x / warpSize]; + __syncthreads(); +#elif __CUDA_ARCH__ < 700 + if (laneId == 0 && i < rows) + { + hackOffset = hackOffsets[hackId]; + nextOffset = hackOffsets[hackId+1]; + } + + hackOffset = __shfl(hackOffset, 0); + nextOffset = __shfl(nextOffset, 0); +#else + if (laneId == 0 && i < rows) + { + hackOffset = hackOffsets[hackId]; + nextOffset = hackOffsets[hackId+1]; + } + + hackOffset = __shfl_sync(0xFFFFFFFF,hackOffset, 0); + nextOffset = __shfl_sync(0xFFFFFFFF,nextOffset, 0); + +#endif + + if (hackId >= hackCount) + return; + + dM += hackOffset*hackSize + hackLaneId; + offsets += hackOffset; + + // diags for this hack is next hackOffset minus current hackOffset + int diags = nextOffset - hackOffset; + + + // Warp oriented + int rounds = (diags + warpSize - 1)/warpSize; + + volatile int *offsetsChunk = dynShrMem + warpId*warpSize; + + for (int r = 0; r < rounds; r++) + { + // in the last round diags will be <= warpSize + if (laneId < diags) + offsetsChunk[laneId] = offsets[laneId]; + + if (i < rows) + { + int count = min(diags, warpSize); + +#ifdef USE_PREFETCHING + int j; + for (j=0; j<=count-2; j += 2) + { + // prefetch 2 values + int column1 = offsetsChunk[j] + i; + int column2 = offsetsChunk[j+1] + i; + + VALUE_TYPE xValue1, xValue2; + VALUE_TYPE mValue1, mValue2; + + bool inside1 = column1 >= 0 && column1 < cols; + bool inside2 = column2 >= 0 && column2 < cols; + + if(inside1) + { + mValue1 = dM[0]; +#ifdef ENABLE_CACHE + xValue1 = fetchTex (column1); +#else + xValue1 = x[column1]; +#endif + } + + dM += hackSize; + + if(inside2) + { + mValue2 = dM[0]; +#ifdef ENABLE_CACHE + xValue2 = fetchTex (column2); +#else + xValue2 = x[column2]; +#endif + } + + dM += hackSize; + + if(inside1) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue1, xValue1, zProd); + if(inside2) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue2, xValue2, zProd); + } + + for (;j= 0 && column < cols) + { + VALUE_TYPE xValue; +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + } + + dM += hackSize; + } +#else + for (int j=0;j= 0 && column < cols) + { + VALUE_TYPE xValue; +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + } + + dM += hackSize; + } +#endif + + } + + diags -= warpSize; + offsets += warpSize; + } + + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (i >= rows) + return; + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[i] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[i] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const VALUE_TYPE *x, int hackCount) +{ + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, CONCAT(zero_,VALUE_TYPE)(), hackCount); +} + +__global__ void +CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const VALUE_TYPE *x, VALUE_TYPE beta, int hackCount) +{ + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta, hackCount); +} + +void +CONCAT(_,GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, + const VALUE_TYPE *x, VALUE_TYPE beta) +{ + dim3 block (THREAD_BLOCK); + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK); + + int hackCount = (rows + hackSize - 1)/hackSize; + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn) <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta, hackCount); + else + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn_b0) <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, hackCount); + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} + diff --git a/cuda/spgpu/kernels/hdia_sspmv.cu b/cuda/spgpu/kernels/hdia_sspmv.cu new file mode 100644 index 00000000..7370f368 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "hdia.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "hdia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hdia_zspmv.cu b/cuda/spgpu/kernels/hdia_zspmv.cu new file mode 100644 index 00000000..4f955ed9 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "hdia.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "hdia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hell_cspmv.cu b/cuda/spgpu/kernels/hell_cspmv.cu new file mode 100644 index 00000000..86fbfc2b --- /dev/null +++ b/cuda/spgpu/kernels/hell_cspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL C +#define TEX_FETCH_TYPE cuFloatComplex +#include "hell_spmv_base.cuh" diff --git a/cuda/spgpu/kernels/hell_dspmv.cu b/cuda/spgpu/kernels/hell_dspmv.cu new file mode 100644 index 00000000..b06f8cfb --- /dev/null +++ b/cuda/spgpu/kernels/hell_dspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#define TEX_FETCH_TYPE int2 +#include "hell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hell_spmv_base.cuh b/cuda/spgpu/kernels/hell_spmv_base.cuh new file mode 100644 index 00000000..ca074d33 --- /dev/null +++ b/cuda/spgpu/kernels/hell_spmv_base.cuh @@ -0,0 +1,159 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_HELL_NAME +#undef X_TEX +#define X_TEX CONCAT(x_tex_, FUNC_SUFFIX) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif +#if __CUDA_ARCH__ < 300 +extern __shared__ int dynShrMem[]; +#endif + +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_vanilla) +#define GEN_SPGPU_HELL_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),hellspmv_vanilla) +#include "hell_spmv_base_template.cuh" +#undef GEN_SPGPU_HELL_NAME +#if 0 +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_prefetch) +#define GEN_SPGPU_HELL_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hellspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "hell_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef GEN_SPGPU_HELL_NAME +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache_prefetch) +#define GEN_SPGPU_HELL_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache_prefetch) +#include "hell_spmv_base_template.cuh" +#undef GEN_SPGPU_HELL_NAME +#undef USE_PREFETCHING +#endif +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache) +#define GEN_SPGPU_HELL_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache) +#include "hell_spmv_base_template.cuh" +#undef GEN_SPGPU_HELL_NAME +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv) +void +GEN_SPGPU_HELL_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* cM, + const int* rP, + int hackSize, + const __device int* hackOffsets, + const __device int* rS, + const __device int* rIdx, + int avgNnzPerRow, + int rows, + const VALUE_TYPE *x, + VALUE_TYPE beta, + int baseIndex) +{ + + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + // maxNForACall should be a multiple of hackSize + maxNForACall = (maxNForACall/hackSize)*hackSize; + //fprintf(stderr,"Entering kernel %d maxNForACall\n",maxNForACall); + + while (rows > maxNForACall) //managing large vectors + { + + CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); + /* if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); */ + /* else if (avgNnzPerRow < 20) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); */ + /* else */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); */ + + y = y + maxNForACall; + z = z + maxNForACall; + hackOffsets = hackOffsets + maxNForACall/hackSize; + rS = rS + maxNForACall; + + rows -= maxNForACall; + } + //fprintf(stderr,"Calling kernel on %d rows\n",rows); + CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); + //fprintf(stderr,"Done kernel on %d rows\n",rows); + /* if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); */ + /* else if (avgNnzPerRow < 20) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); */ + /* else */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); */ + + cudaCheckError("CUDA error on hell_spmv"); +} + diff --git a/cuda/spgpu/kernels/hell_spmv_base_template.cuh b/cuda/spgpu/kernels/hell_spmv_base_template.cuh new file mode 100644 index 00000000..9ecd8f74 --- /dev/null +++ b/cuda/spgpu/kernels/hell_spmv_base_template.cuh @@ -0,0 +1,357 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#define IDX2 +#define THREAD_BLOCK 128 + +__device__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx_2) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[THREAD_BLOCK]; + + if (i < rows) { + int hackId = i / hackSize; + int hackLaneId = i % hackSize; + + int hackOffset; + unsigned int laneId = threadIdx.x % 32; +#if __CUDA_ARCH__ < 300 + // "volatile" used to avoid __syncthreads() + volatile int* warpHackOffset = dynShrMem; + + unsigned int warpId = threadIdx.x / 32; + + if (laneId == 0) + warpHackOffset[warpId] = hackOffsets[hackId]; + + hackOffset = warpHackOffset[warpId] + hackLaneId; +#elif __CUDA_ARCH__ < 700 + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl(hackOffset, 0) + hackLaneId; +#else + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl_sync(0xFFFFFFFF,hackOffset, 0) + hackLaneId; +#endif + + rP += hackOffset; + cM += hackOffset; + + int rowSize = rS[i]; + int rowSizeM = rowSize / 2; + + if (threadIdx.y == 0) { + if (rowSize % 2) + ++rowSizeM; + } else { + rP += hackSize; + cM += hackSize; + } + + + for (int j = 0; j < rowSizeM; j++) { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += hackSize; + rP += hackSize; + + value = cM[0]; + cM += hackSize; + cM += hackSize; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y == 1) + temp[threadIdx.x] = zProd; + + __syncthreads(); + + if (threadIdx.y == 0) { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.x]); + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) { + + int hackId = i / hackSize; + int hackLaneId = i % hackSize; + + int hackOffset; + unsigned int laneId = threadIdx.x % 32; +#if __CUDA_ARCH__ < 300 + // "volatile" used to avoid __syncthreads() + volatile int* warpHackOffset = dynShrMem; + + unsigned int warpId = threadIdx.x / 32; + + if (laneId == 0) + warpHackOffset[warpId] = hackOffsets[hackId]; + + hackOffset = warpHackOffset[warpId] + hackLaneId; +#elif __CUDA_ARCH__ < 700 + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl(hackOffset, 0) + hackLaneId; +#else + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl_sync(0xFFFFFFFF,hackOffset, 0) + hackLaneId; +#endif + + rP += hackOffset; + cM += hackOffset; + + int rowSize = rS[i]; + +#ifdef USE_PREFETCHING + for (int j = 0; j < rowSize / 2; j++) { + int pointers1, pointers2; + VALUE_TYPE values1, values2; + VALUE_TYPE fetches1, fetches2; + + pointers1 = rP[0] - baseIndex; + rP += hackSize; + pointers2 = rP[0] - baseIndex; + rP += hackSize; + + values1 = cM[0]; + cM += hackSize; + + values2 = cM[0]; + cM += hackSize; + +#ifdef ENABLE_CACHE + fetches1 = fetchTex(pointers1); + fetches2 = fetchTex(pointers2); +#else + fetches1 = x[pointers1]; + fetches2 = x[pointers2]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(values1, fetches1, zProd); + zProd = CONCAT(VALUE_TYPE, _fma)(values2, fetches2, zProd); + } + + // odd row size + if (rowSize % 2) { + int pointer = rP[0] - baseIndex; + VALUE_TYPE value = cM[0]; + VALUE_TYPE fetch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#else + for (int j = 0; j < rowSize; j++) { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += hackSize; + + value = cM[0]; + cM += hackSize; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#endif + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + +} + +__global__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_ridx) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, const int* rIdx, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + int outRow = 0; + if (i < rows) { + + outRow = rIdx[i]; + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[outRow]; + } +#if 1 + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, outRow, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, outRow, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, outRow, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#endif +} + + +__device__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _) + (VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) { + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + } + +#ifdef IDX2 + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, i, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, i, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, i, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#endif + +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, int baseIndex) +{ + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, CONCAT(zero_,VALUE_TYPE)(), baseIndex); +} + +__global__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn) + (VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +} + +void +CONCAT(_,GEN_SPGPU_HELL_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, + const __device int* rIdx, int avgNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int avgThreshold; + + if (handle->capabilityMajor < 2) + avgThreshold = 8; + else if (handle->capabilityMajor < 3) + avgThreshold = 16; + else + avgThreshold = 32; +#ifdef IDX2 +#if defined(HELL_FORCE_THREADS_1) + dim3 block (THREAD_BLOCK, 1); +#elif defined(HELL_FORCE_THREADS_2) + dim3 block (THREAD_BLOCK, 2); +#else + dim3 block (THREAD_BLOCK, avgNnzPerRow >= avgThreshold ? 2 : 1); +#endif +#else + dim3 block (THREAD_BLOCK, 1); +#endif + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK); + + // Should we generalize the code to 1/2/4/8 threads per row? + // And maybe adjust THREAD_BLOCK size? + int shrMemSize; + shrMemSize=THREAD_BLOCK*sizeof(VALUE_TYPE); + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + if (rIdx) { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_ridx), cudaFuncCachePreferL1); + + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_ridx) + <<< grid, block, shrMemSize, handle->currentStream >>> (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, rows, x, beta, baseIndex); + } else { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn) + <<< grid, block, shrMemSize, handle->currentStream >>> (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_b0) + <<< grid, block, shrMemSize, handle->currentStream >>> (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, baseIndex); + } + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} diff --git a/cuda/spgpu/kernels/hell_sspmv.cu b/cuda/spgpu/kernels/hell_sspmv.cu new file mode 100644 index 00000000..8d296b81 --- /dev/null +++ b/cuda/spgpu/kernels/hell_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "hell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hell_zspmv.cu b/cuda/spgpu/kernels/hell_zspmv.cu new file mode 100644 index 00000000..784ab719 --- /dev/null +++ b/cuda/spgpu/kernels/hell_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "hell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/igath.cu b/cuda/spgpu/kernels/igath.cu new file mode 100644 index 00000000..8c0fe1b6 --- /dev/null +++ b/cuda/spgpu/kernels/igath.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE int +#define TYPE_SYMBOL I +#include "gath_base.cuh" diff --git a/cuda/spgpu/kernels/iscat.cu b/cuda/spgpu/kernels/iscat.cu new file mode 100644 index 00000000..2001a740 --- /dev/null +++ b/cuda/spgpu/kernels/iscat.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE int +#define TYPE_SYMBOL I +#include "scat_base.cuh" diff --git a/cuda/spgpu/kernels/isetscal.cu b/cuda/spgpu/kernels/isetscal.cu new file mode 100644 index 00000000..d54eae95 --- /dev/null +++ b/cuda/spgpu/kernels/isetscal.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE int +#define TYPE_SYMBOL I +#include "setscal_base.cuh" diff --git a/cuda/spgpu/kernels/mathbase.cuh b/cuda/spgpu/kernels/mathbase.cuh new file mode 100644 index 00000000..e7d77778 --- /dev/null +++ b/cuda/spgpu/kernels/mathbase.cuh @@ -0,0 +1,53 @@ +#pragma once + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } +__device__ __host__ static bool float_isZero(float x) { return x == 0.0f; } +__device__ __host__ static bool int_isZero(int x) { return x == 0; } +__device__ __host__ static bool int_isNotZero(int x) { return x != 0; } + +__device__ static int int_fma(int a, int b, int c) { return ((a*b)+c); } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } +__device__ static float float_abs(float a) { return fabsf(a); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_abs(cuFloatComplex a) { return make_cuFloatComplex(cuCabsf(a),0); } + +//__device__ static float cuFloatComplex_abs(cuFloatComplex a) { return cuCabsf(a); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } +__device__ __host__ static bool double_isZero(double x) { return x == 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } +__device__ static double double_abs(double a) { return fabs (a); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_abs(cuDoubleComplex a) { return make_cuDoubleComplex(cuCabs(a),0); } +//__device__ static double cuDoubleComplex_abs(cuDoubleComplex a) { return cuCabs(a); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif diff --git a/cuda/spgpu/kernels/sabs.cu b/cuda/spgpu/kernels/sabs.cu new file mode 100644 index 00000000..e335e773 --- /dev/null +++ b/cuda/spgpu/kernels/sabs.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define RES_VALUE_TYPE float +#define TYPE_SYMBOL S +#include "abs_base.cuh" + diff --git a/cuda/spgpu/kernels/samax.cu b/cuda/spgpu/kernels/samax.cu new file mode 100644 index 00000000..aaef17e2 --- /dev/null +++ b/cuda/spgpu/kernels/samax.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "amax_base.cuh" + diff --git a/cuda/spgpu/kernels/sasum.cu b/cuda/spgpu/kernels/sasum.cu new file mode 100644 index 00000000..33392203 --- /dev/null +++ b/cuda/spgpu/kernels/sasum.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "asum_base.cuh" + diff --git a/cuda/spgpu/kernels/saxpby.cu b/cuda/spgpu/kernels/saxpby.cu new file mode 100644 index 00000000..2c46f19e --- /dev/null +++ b/cuda/spgpu/kernels/saxpby.cu @@ -0,0 +1,100 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alpha, float* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (beta == 0.0f) + z[id] = PREC_FMUL(alpha,x[id]); + else + z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + } +} + + +void spgpuSaxpby_(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + spgpuSaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuSaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + spgpuSaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuSaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on saxpby"); +} + +void spgpuSmaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x, + int count, int pitch) +{ + + for (int i=0; icurrentStream>>>(y, n, alpha, x); +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, + __device VALUE_TYPE *y, + int n, + VALUE_TYPE alpha, + __device VALUE_TYPE *x) +{ + + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (n > maxNForACall) //managing large vectors + { + CONCAT(_,GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL))(handle, y, maxNForACall, alpha, x); + x = x + maxNForACall; + y = y + maxNForACall; + n -= maxNForACall; + } + + CONCAT(_,GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL))(handle, y, n, alpha, x); + + cudaCheckError("CUDA error on scal"); +} diff --git a/cuda/spgpu/kernels/scat_base.cuh b/cuda/spgpu/kernels/scat_base.cuh new file mode 100644 index 00000000..f6f619b1 --- /dev/null +++ b/cuda/spgpu/kernels/scat_base.cuh @@ -0,0 +1,89 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_FUNC_NAME +#define GEN_SPGPU_FUNC_NAME(x) CONCAT(CONCAT(spgpu,x),scat) + +#define BLOCK_SIZE 256 + +// Define: +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#include "mathbase.cuh" + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern) + (VALUE_TYPE* vector, int count, const int* indices, const VALUE_TYPE* values, int firstIndex, VALUE_TYPE beta) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < count) + { + int pos = indices[id]-firstIndex; + if (pos < 0) + return; + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + vector[pos] = CONCAT(VALUE_TYPE, _fma)(beta, vector[pos], values[id]); + else + vector[pos] = values[id]; + } +} + +void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (spgpuHandle_t handle, VALUE_TYPE* y, int xNnz, const VALUE_TYPE *xValues, + const __device int *xIndices, int xBaseIndex, VALUE_TYPE beta) +{ + int msize = (xNnz+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(y, xNnz, xIndices, xValues, xBaseIndex, beta); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE* y, + int xNnz, + const __device VALUE_TYPE *xValues, + const __device int *xIndices, + int xBaseIndex, + VALUE_TYPE beta) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (xNnz > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, y, maxNForACall, xValues, xIndices, xBaseIndex, beta); + xIndices += maxNForACall; + xValues += maxNForACall; + xNnz -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, y, xNnz, xValues, xIndices, xBaseIndex, beta); + cudaCheckError("CUDA error on scat"); +} + diff --git a/cuda/spgpu/kernels/sdot.cu b/cuda/spgpu/kernels/sdot.cu new file mode 100644 index 00000000..c19c7710 --- /dev/null +++ b/cuda/spgpu/kernels/sdot.cu @@ -0,0 +1,175 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudalang.h" +#include "cudadebug.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +//#define USE_CUBLAS + +#ifdef USE_CUBLAS +#include "cublas.h" +#endif + +#define BLOCK_SIZE 320 +//#define BLOCK_SIZE 512 + +//#define ASSUME_LOCK_SYNC_PARALLELISM + +#ifndef USE_CUBLAS +static __device__ float sdotReductionResult[128]; +#endif + +__global__ void spgpuSdot_kern(int n, float* x, float* y) +{ + __shared__ float sSum[BLOCK_SIZE]; + + float res = 0; + + float* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + y += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + int numSteps = (lastX - x + blockOffset - 1)/blockOffset; + + // prefetching + for (int j = 0; j < numSteps / 2; j++) + { + float x1 = x[0]; x += blockOffset; + float y1 = y[0]; y += blockOffset; + float x2 = x[0]; x += blockOffset; + float y2 = y[0]; y += blockOffset; + + res = PREC_FADD(res, PREC_FMUL(x1,y1)); + res = PREC_FADD(res, PREC_FMUL(x2,y2)); + + } + + if (numSteps % 2) + { + res = PREC_FADD(res, PREC_FMUL(*x,*y)); + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + float tRes[128]; + spgpuSdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, sdotReductionResult, blocks*sizeof(float)); + + for (int i=0; icurrentStream>>>(y, n, val); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, + int first, + int last, + int baseIndex, + VALUE_TYPE val, + __device VALUE_TYPE* y) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + int n=last-first+1; + y += (first-baseIndex); + while (n > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, maxNForACall, val,y); + y += maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, n, val,y); + cudaCheckError("CUDA error on scat"); +} + diff --git a/cuda/spgpu/kernels/sgath.cu b/cuda/spgpu/kernels/sgath.cu new file mode 100644 index 00000000..d1980bc6 --- /dev/null +++ b/cuda/spgpu/kernels/sgath.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "gath_base.cuh" diff --git a/cuda/spgpu/kernels/snrm2.cu b/cuda/spgpu/kernels/snrm2.cu new file mode 100644 index 00000000..27b8a6e9 --- /dev/null +++ b/cuda/spgpu/kernels/snrm2.cu @@ -0,0 +1,166 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudalang.h" +#include "cudadebug.h" + + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +//#define USE_CUBLAS + +#define BLOCK_SIZE 320 +//#define BLOCK_SIZE 512 + +//#define ASSUME_LOCK_SYNC_PARALLELISM + +static __device__ float snrm2ReductionResult[128]; + +__global__ void spgpuSnrm2_kern(int n, float* x) +{ + __shared__ float sSum[BLOCK_SIZE]; + + float res = 0; + + float* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + int numSteps = (lastX - x + blockOffset - 1)/blockOffset; + + // prefetching + for (int j = 0; j < numSteps / 2; j++) + { + float x1 = x[0]; x += blockOffset; + float x2 = x[0]; x += blockOffset; + + res = PREC_FADD(res, PREC_FMUL(x1,x1)); + res = PREC_FADD(res, PREC_FMUL(x2,x2)); + + } + + if (numSteps % 2) + { + float x1 = x[0]; + res = PREC_FADD(res, PREC_FMUL(x1,x1)); + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + float tRes[128]; + + spgpuSnrm2_kern<<currentStream>>>(n, x); + cudaMemcpyFromSymbol(tRes, snrm2ReductionResult,blocks*sizeof(float)); + + for (int i=0; icurrentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuZaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + spgpuZaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuZaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on daxpby"); +} + +void spgpuZmaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x, + int count, int pitch) +{ + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + cuDoubleComplex tRes[128]; + + spgpuZdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, ddotReductionResult,blocks*sizeof(cuDoubleComplex)); + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + double tRes[128]; + + spgpuZnrm2_kern<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, dnrm2ReductionResult,blocks*sizeof(double)); + + for (int i=0; i aux_b(:,1) + do i=1, nrt + b_col_glob(i) = 1.d0 + enddo + + else + + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + end if + + + select case(psb_toupper(acfmt)) + case('COO') + acmold => acoo + case('CSR') + acmold => acsr + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case default + write(*,*) 'Unknown format defaulting to CSR' + acmold => acsr + end select + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('CSRG') + agmold => acsrg + case('HYBG') + agmold => ahybg + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select +#endif + + + ! switch over different partition types + if (ipart == 0) then + call psb_barrier(ctxt) + if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') + allocate(ivg(nrt),ipv(np)) + do i=1,nrt + call part_block(i,nrt,np,ipv,nv) + ivg(i) = ipv(1) + enddo + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else if (ipart == 2) then + if (iam==psb_root_) then + write(psb_out_unit,'("Partition type: graph")') + write(psb_out_unit,'(" ")') + ! write(psb_err_unit,'("Build type: graph")') + call build_mtpart(aux_a,np) + endif + call psb_barrier(ctxt) + call distr_mtpart(psb_root_,ctxt) + call getv_mtpart(ivg) + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else + if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') + call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) + end if + + call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) + + t2 = psb_wtime() - t0 + + call psb_amx(ctxt, t2) + + if (iam==psb_root_) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 + write(psb_out_unit,'(" ")') + end if + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x_col,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + call psb_geins(ione,(/ig/),(/(cone + (cone*ig)/nrg)/),x_col,desc_a,info) + end do + call psb_geasb(x_col,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + xc1 = x_col%get_vect() + call xv%bld(xc1) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(xc1,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(cone,a,xv,czero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + ! FIXME: cache flush needed here + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(cone,agpu,xv,czero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + xc1 = bv%get_vect() + xc2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + call xg%sync() + ! FIXME: cache flush needed here + + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(cone,agpu,xg,czero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + ! For timing purposes we need to make sure all threads + ! in the device are done. + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + xc1 = bv%get_vect() + xc2 = bg%get_vect() + call psb_geaxpby(-cone,bg,+cone,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps +#endif + + + amatsize = a%sizeof() + agmatsize = agpu%sizeof() + damatsize = amatsize + damatsize = damatsize/(1024*1024) + dgmatsize = agmatsize + dgmatsize = dgmatsize/(1024*1024) + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,damatsize) + call psb_sum(ctxt,dgmatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,'("Matrix: ",a)') mtrx_file + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nrt + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize + write(psb_out_unit,& + &'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize + write(psb_out_unit,& + &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize + write(psb_out_unit,& + &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*2*psb_sizeof_dp + psb_sizeof_ip)+& + & annz*(2*psb_sizeof_dp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + call psb_gefree(b_col, desc_a,info) + call psb_gefree(x_col, desc_a,info) + call psb_gefree(xv, desc_a,info) + call psb_gefree(bv, desc_a,info) + call psb_spfree(a, desc_a,info) +#ifdef HAVE_GPU + call psb_gefree(xg, desc_a,info) + call psb_gefree(bg, desc_a,info) + call psb_spfree(agpu,desc_a,info) + call psb_gpu_exit() +#endif + call psb_cdfree(desc_a,info) + + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +end program c_file_spmv + + + + + diff --git a/test/gpukern/d_file_spmv.F90 b/test/gpukern/d_file_spmv.F90 new file mode 100644 index 00000000..2bbc0bc4 --- /dev/null +++ b/test/gpukern/d_file_spmv.F90 @@ -0,0 +1,496 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +program d_file_spmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_GPU + use psb_gpu_mod +#endif + use data_input + implicit none + + ! input parameters + character(len=200) :: mtrx_file + + ! sparse matrices + type(psb_dspmat_type) :: a, aux_a, agpu + + ! dense matrices + real(psb_dpk_), allocatable, target :: aux_b(:,:), d(:) + real(psb_dpk_), allocatable , save :: x_col_glob(:), r_col_glob(:) + real(psb_dpk_), pointer :: b_col_glob(:) + type(psb_d_vect_type) :: b_col, x_col, r_col + type(psb_d_vect_type) :: xg, bg, xv, bv +#ifdef HAVE_GPU + type(psb_d_vect_gpu) :: vmold +#endif + real(psb_dpk_), allocatable :: xc1(:),xc2(:) + ! communications data structure + type(psb_desc_type):: desc_a + + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: iam, np + integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes + real(psb_dpk_) :: err, eps, damatsize, dgmatsize + + character(len=5) :: acfmt, agfmt + character(len=20) :: name + character(len=2) :: filefmt + integer, parameter :: iunit=12 + integer, parameter :: times=2000 + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + + type(psb_d_coo_sparse_mat), target :: acoo + type(psb_d_csr_sparse_mat), target :: acsr + type(psb_d_ell_sparse_mat), target :: aell + type(psb_d_hll_sparse_mat), target :: ahll + type(psb_d_hdia_sparse_mat), target :: ahdia +#ifdef HAVE_GPU + type(psb_d_elg_sparse_mat), target :: aelg + type(psb_d_csrg_sparse_mat), target :: acsrg + type(psb_d_hybg_sparse_mat), target :: ahybg + type(psb_d_hlg_sparse_mat), target :: ahlg + type(psb_d_hdiag_sparse_mat), target :: ahdiag +#endif + class(psb_d_base_sparse_mat), pointer :: acmold, agmold + ! other variables + integer(psb_lpk_) :: i,j,nrt, ns, nr, ig, nrg + integer(psb_ipk_) :: internal, m,ii,nnzero,info, ipart + real(psb_dpk_) :: t0,t1, t2, tprec, flops + real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + integer :: nrhs, nrow, n_row, dim, nv, ne + integer, allocatable :: ivg(:), ipv(:) + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) +#ifdef HAVE_GPU + call psb_gpu_init(ctxt) +#endif + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + + + name='file_spmv' + if(psb_get_errstatus() /= 0) goto 9999 + info=psb_success_ + call psb_set_errverbosity(2) + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_GPU + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) +#endif + + if (iam == 0) then + write(*,*) 'Matrix? ' + call read_data(mtrx_file,psb_inp_unit) + write(*,*) 'file format' + call read_data(filefmt,psb_inp_unit) + write(*,*) 'CPU format' + call read_data(acfmt,psb_inp_unit) + write(*,*) 'GPU format' + call read_data(agfmt,psb_inp_unit) + write(*,*) 'distribution ' + call read_data(ipart,psb_inp_unit) + write(*,*) 'Read all data, going on' + end if + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,ipart) + call psb_barrier(ctxt) + t0 = psb_wtime() + ! read the input matrix to be processed and (possibly) the rhs + nrhs = 1 + + if (iam==psb_root_) then + select case(psb_toupper(filefmt)) + case('MM') + ! For Matrix Market we have an input file for the matrix + ! and an (optional) second file for the RHS. + call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case ('HB') + ! For Harwell-Boeing we have a single file which may or may not + ! contain an RHS. + call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case default + info = -1 + write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt + end select + if (info /= 0) then + write(psb_err_unit,*) 'Error while reading input matrix ' + call psb_abort(ctxt) + end if + + ! + ! Always get nnz from original matrix. + ! Some formats add fill-in and do not keep track + ! of how many were added. So if the original matrix + ! contained some extra zeros, the count of entries + ! is not recoverable exactly. + ! + nrt = aux_a%get_nrows() + annz = aux_a%get_nzeros() + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + write(psb_out_unit,'("Generating an rhs...")') + write(psb_out_unit,'(" ")') + call psb_realloc(nrt,1,aux_b,info) + if (info /= 0) then + call psb_errpush(4000,name) + goto 9999 + endif + + b_col_glob => aux_b(:,1) + do i=1, nrt + b_col_glob(i) = 1.d0 + enddo + + else + + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + end if + + + select case(psb_toupper(acfmt)) + case('COO') + acmold => acoo + case('CSR') + acmold => acsr + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case('HDIA') + acmold => ahdia + case default + write(*,*) 'Unknown format defaulting to CSR' + acmold => acsr + end select + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('CSRG') + agmold => acsrg + case('HYBG') + agmold => ahybg + case('HDIAG') + agmold => ahdiag + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select +#endif + + + ! switch over different partition types + if (ipart == 0) then + call psb_barrier(ctxt) + if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') + allocate(ivg(nrt),ipv(np)) + do i=1,nrt + call part_block(i,nrt,np,ipv,nv) + ivg(i) = ipv(1) + enddo + call psb_matdist(aux_a, a, ctxt, desc_a,info,vg=ivg) + else if (ipart == 2) then + if (iam==psb_root_) then + write(psb_out_unit,'("Partition type: graph")') + write(psb_out_unit,'(" ")') + ! write(psb_err_unit,'("Build type: graph")') + call build_mtpart(aux_a,np) + endif + call psb_barrier(ctxt) + call distr_mtpart(psb_root_,ctxt) + call getv_mtpart(ivg) + call psb_matdist(aux_a, a, ctxt, desc_a,info,vg=ivg) + else + if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') + call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) + end if + + call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) + + t2 = psb_wtime() - t0 + + call psb_amx(ctxt, t2) + + if (iam==psb_root_) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 + write(psb_out_unit,'(" ")') + end if + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x_col,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + call psb_geins(ione,(/ig/),(/(done + (done*ig)/nrg)/),x_col,desc_a,info) + end do + call psb_geasb(x_col,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + xc1 = x_col%get_vect() + call xv%bld(xc1) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(xc1,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,a,xv,dzero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + ! FIXME: cache flush needed here + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,agpu,xv,dzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + xc1 = bv%get_vect() + xc2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + call xg%sync() + ! FIXME: cache flush needed here + + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + ! For timing purposes we need to make sure all threads + ! in the device are done. + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + xc1 = bv%get_vect() + xc2 = bg%get_vect() + call psb_geaxpby(-done,bg,+done,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps +#endif + + + amatsize = a%sizeof() + agmatsize = agpu%sizeof() + damatsize = amatsize + damatsize = damatsize/(1024*1024) + dgmatsize = agmatsize + dgmatsize = dgmatsize/(1024*1024) + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,damatsize) + call psb_sum(ctxt,dgmatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,'("Matrix: ",a)') mtrx_file + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nrt + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize + write(psb_out_unit,& + &'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize + write(psb_out_unit,& + &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize + write(psb_out_unit,& + &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*psb_sizeof_dp + psb_sizeof_ip)+& + & annz*(psb_sizeof_dp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + call psb_gefree(b_col, desc_a,info) + call psb_gefree(x_col, desc_a,info) + call psb_gefree(xv, desc_a,info) + call psb_gefree(bv, desc_a,info) + call psb_spfree(a, desc_a,info) +#ifdef HAVE_GPU + call psb_gefree(xg, desc_a,info) + call psb_gefree(bg, desc_a,info) + call psb_spfree(agpu,desc_a,info) + call psb_gpu_exit() +#endif + call psb_cdfree(desc_a,info) + + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +end program d_file_spmv + + + + + diff --git a/test/gpukern/data_input.f90 b/test/gpukern/data_input.f90 new file mode 100644 index 00000000..274cb7ad --- /dev/null +++ b/test/gpukern/data_input.f90 @@ -0,0 +1,221 @@ +! +! +! MLD2P4 version 2.0 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.0) +! +! (C) Copyright 2008,2009,2010 +! +! Salvatore Filippone +! Alfredo Buttari +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the MLD2P4 group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +module data_input + + interface read_data + module procedure read_char, read_int,& + & read_double, read_single, read_logical,& + & string_read_char, string_read_int,& + & string_read_double, string_read_single, & + & string_read_logical + end interface read_data + interface trim_string + module procedure trim_string + end interface + + character(len=4096), private :: charbuf + character, private, parameter :: def_marker="!" + +contains + + subroutine read_logical(val,file,marker) + logical, intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_logical + + subroutine read_char(val,file,marker) + character(len=*), intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_char + + + subroutine read_int(val,file,marker) + integer, intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_int + subroutine read_single(val,file,marker) + use psb_base_mod + real(psb_spk_), intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_single + subroutine read_double(val,file,marker) + use psb_base_mod + real(psb_dpk_), intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_double + + subroutine string_read_char(val,file,marker) + character(len=*), intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),'(a)') val + end subroutine string_read_char + + subroutine string_read_int(val,file,marker) + integer, intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_int + + subroutine string_read_single(val,file,marker) + use psb_base_mod + real(psb_spk_), intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_single + + subroutine string_read_double(val,file,marker) + use psb_base_mod + real(psb_dpk_), intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_double + + subroutine string_read_logical(val,file,marker) + use psb_base_mod + logical, intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_logical + + function trim_string(string,marker) + character(len=*), intent(in) :: string + character(len=1), optional, intent(in) :: marker + character(len=len(string)) :: trim_string + character(len=1) :: marker_ + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + idx=index(string,marker_) + trim_string = adjustl(string(idx:)) + end function trim_string +end module data_input + diff --git a/test/gpukern/dpdegenmv.F90 b/test/gpukern/dpdegenmv.F90 new file mode 100644 index 00000000..ab616471 --- /dev/null +++ b/test/gpukern/dpdegenmv.F90 @@ -0,0 +1,997 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: dpdegenmv.f90 +! +! Program: pdegenmv +! This sample program measures the performance of the matrix-vector product. +! The matrix is generated in the same way as for the pdegen test case of +! the main PSBLAS library. +! +! +module psb_d_pde3d_mod + + + use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,& + & psb_dspmat_type, psb_d_vect_type, dzero,& + & psb_d_base_sparse_mat, psb_d_base_vect_type, & + & psb_i_base_vect_type, psb_l_base_vect_type + + interface + function d_func_3d(x,y,z) result(val) + import :: psb_dpk_ + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + end function d_func_3d + end interface + + interface psb_gen_pde3d + module procedure psb_d_gen_pde3d + end interface psb_gen_pde3d + +contains + + function d_null_func_3d(x,y,z) result(val) + + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + + val = dzero + + end function d_null_func_3d + ! + ! functions parametrizing the differential equation + ! + function b1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b1 + real(psb_dpk_), intent(in) :: x,y,z + b1=done/sqrt((3*done)) + end function b1 + function b2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b2 + real(psb_dpk_), intent(in) :: x,y,z + b2=done/sqrt((3*done)) + end function b2 + function b3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b3 + real(psb_dpk_), intent(in) :: x,y,z + b3=done/sqrt((3*done)) + end function b3 + function c(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: c + real(psb_dpk_), intent(in) :: x,y,z + c=dzero + end function c + function a1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a1 + real(psb_dpk_), intent(in) :: x,y,z + a1=done/80 + end function a1 + function a2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a2 + real(psb_dpk_), intent(in) :: x,y,z + a2=done/80 + end function a2 + function a3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a3 + real(psb_dpk_), intent(in) :: x,y,z + a3=done/80 + end function a3 + function g(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: g + real(psb_dpk_), intent(in) :: x,y,z + g = dzero + if (x == done) then + g = done + else if (x == dzero) then + g = exp(y**2-z**2) + end if + end function g + + + ! + ! subroutine to allocate and fill in the coefficient matrix and + ! the rhs. + ! + subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& + & f,amold,vmold,imold,partition,nrl,iv) + use psb_base_mod + use psb_util_mod + ! + ! Discretizes the partial differential equation + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f + ! dxdx dydy dzdz dx dy dz + ! + ! with Dirichlet boundary conditions + ! u = g + ! + ! on the unit cube 0<=x,y,z<=1. + ! + ! + ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. + ! + implicit none + integer(psb_ipk_) :: idim + type(psb_dspmat_type) :: a + type(psb_d_vect_type) :: xv,bv + type(psb_desc_type) :: desc_a + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: info + character(len=*) :: afmt + procedure(d_func_3d), optional :: f + class(psb_d_base_sparse_mat), optional :: amold + class(psb_d_base_vect_type), optional :: vmold + class(psb_i_base_vect_type), optional :: imold + integer(psb_ipk_), optional :: partition, nrl,iv(:) + + ! Local variables. + + integer(psb_ipk_), parameter :: nb=20 + type(psb_d_csc_sparse_mat) :: acsc + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat) :: acsr + real(psb_dpk_) :: zt(nb),x,y,z + integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ + integer(psb_lpk_) :: m,n,glob_row,nt + integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner + ! For 3D partition + ! Note: integer control variables going directly into an MPI call + ! must be 4 bytes, i.e. psb_mpk_ + integer(psb_mpk_) :: npdims(3), npp, minfo + integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: icoeff + integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) + real(psb_dpk_), allocatable :: val(:) + ! deltah dimension of each grid cell + ! deltat discretization time + real(psb_dpk_) :: deltah, sqdeltah, deltah2 + real(psb_dpk_), parameter :: rhs=dzero,one=done,zero=dzero + real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb + integer(psb_ipk_) :: err_act + procedure(d_func_3d), pointer :: f_ + character(len=20) :: name, ch_err,tmpfmt + + info = psb_success_ + name = 'create_matrix' + call psb_erractionsave(err_act) + + call psb_info(ctxt, iam, np) + + + if (present(f)) then + f_ => f + else + f_ => d_null_func_3d + end if + + deltah = done/(idim+2) + sqdeltah = deltah*deltah + deltah2 = (2*done)* deltah + + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + + ! initialize array descriptor and sparse matrix storage. provide an + ! estimate of the number of non zeroes + + m = (1_psb_lpk_*idim)*idim*idim + n = m + nnz = ((n*7)/(np)) + if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n + t0 = psb_wtime() + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ctxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ctxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ctxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,3,npdims,info) + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ctxt,desc_a,info,vl=myidx) + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end select + + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz,& + & dupl=psb_dupl_err_) + ! define rhs from boundary conditions; also build initial guess + if (info == psb_success_) call psb_geall(xv,desc_a,info) + if (info == psb_success_) call psb_geall(bv,desc_a,info) + + call psb_barrier(ctxt) + talc = psb_wtime()-t0 + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='allocation rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! we build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! + allocate(val(20*nb),irow(20*nb),& + &icol(20*nb),stat=info) + if (info /= psb_success_ ) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! loop over rows belonging to current process in a block + ! distribution. + + call psb_barrier(ctxt) + t1 = psb_wtime() + do ii=1, nlr,nb + ib = min(nb,nlr-ii+1) + icoeff = 1 + do k=1,ib + i=ii+k-1 + ! local matrix pointer + glob_row=myidx(i) + ! compute gridpoint coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates + x = (ix-1)*deltah + y = (iy-1)*deltah + z = (iz-1)*deltah + zt(k) = f_(x,y,z) + ! internal point: build discretization + ! + ! term depending on (x-1,y,z) + ! + val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 + if (ix == 1) then + zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y-1,z) + val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 + if (iy == 1) then + zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y,z-1) + val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 + if (iz == 1) then + zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + ! term depending on (x,y,z) + val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & + & + c(x,y,z) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + ! term depending on (x,y,z+1) + val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 + if (iz == idim) then + zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y+1,z) + val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 + if (iy == idim) then + zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x+1,y,z) + val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 + if (ix==idim) then + zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + end do + call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) exit + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) + if(info /= psb_success_) exit + zt(:)=dzero + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) + if(info /= psb_success_) exit + end do + + tgen = psb_wtime()-t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='insert rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(val,irow,icol) + + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_cdasb(desc_a,info,mold=imold) + tcdasb = psb_wtime()-t1 + call psb_barrier(ctxt) + t1 = psb_wtime() + if (info == psb_success_) then + if (present(amold)) then + call psb_spasb(a,desc_a,info,mold=amold) + else + call psb_spasb(a,desc_a,info,afmt=afmt) + end if + end if + call psb_barrier(ctxt) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold) + if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + tasb = psb_wtime()-t1 + call psb_barrier(ctxt) + ttot = psb_wtime() - t0 + + call psb_amx(ctxt,talc) + call psb_amx(ctxt,tgen) + call psb_amx(ctxt,tasb) + call psb_amx(ctxt,ttot) + if(iam == psb_root_) then + tmpfmt = a%get_fmt() + write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')& + & tmpfmt + write(psb_out_unit,'("-allocation time : ",es12.5)') talc + write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen + write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb + write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb + write(psb_out_unit,'("-total time : ",es12.5)') ttot + + end if + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + end subroutine psb_d_gen_pde3d + + +end module psb_d_pde3d_mod + + +program pdgenmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_GPU + use psb_gpu_mod +#endif +#ifdef HAVE_RSB + use psb_rsb_mod +#endif + use psb_d_pde3d_mod + implicit none + + ! input parameters + character(len=5) :: acfmt, agfmt + integer :: idim + + ! miscellaneous + real(psb_dpk_), parameter :: one = 1.d0 + real(psb_dpk_) :: t1, t2, tprec, flops, tflops,& + & tt1, tt2, gt1, gt2, gflops, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + + ! sparse matrix and preconditioner + type(psb_dspmat_type) :: a, agpu, aux_a + ! descriptor + type(psb_desc_type) :: desc_a + ! dense matrices + type(psb_d_vect_type), target :: xv, bv, xg, bg +#ifdef HAVE_GPU + type(psb_d_vect_gpu) :: vmold + type(psb_i_vect_gpu) :: imold +#endif + real(psb_dpk_), allocatable :: x1(:), x2(:), x0(:) + ! blacs parameters + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + ! solver parameters + integer(psb_epk_) :: amatsize, precsize, descsize, annz, nbytes + real(psb_dpk_) :: err, eps + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + type(psb_d_coo_sparse_mat), target :: acoo + type(psb_d_csr_sparse_mat), target :: acsr + type(psb_d_ell_sparse_mat), target :: aell + type(psb_d_hll_sparse_mat), target :: ahll + type(psb_d_dia_sparse_mat), target :: adia + type(psb_d_hdia_sparse_mat), target :: ahdia +#ifdef HAVE_RSB + type(psb_d_rsb_sparse_mat), target :: arsb +#endif +#ifdef HAVE_GPU + type(psb_d_elg_sparse_mat), target :: aelg + type(psb_d_csrg_sparse_mat), target :: acsrg +#if CUDA_SHORT_VERSION <= 10 + type(psb_d_hybg_sparse_mat), target :: ahybg +#endif + type(psb_d_hlg_sparse_mat), target :: ahlg + type(psb_d_hdiag_sparse_mat), target :: ahdiag + type(psb_d_dnsg_sparse_mat), target :: adnsg +#endif + class(psb_d_base_sparse_mat), pointer :: agmold, acmold + ! other variables + logical, parameter :: dump=.false. + integer(psb_ipk_) :: info, i, j, nr, nrg + integer(psb_lpk_) :: ig + character(len=20) :: name,ch_err + character(len=40) :: fname + + info=psb_success_ + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + +#ifdef HAVE_GPU + call psb_gpu_init(ctxt) +#endif +#ifdef HAVE_RSB + call psb_rsb_init() +#endif + + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + if(psb_get_errstatus() /= 0) goto 9999 + name='pdegenmv-gpu' + ! + ! Hello world + ! + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_GPU + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) +#endif + ! + ! get parameters + ! + call get_parms(ctxt,acfmt,agfmt,idim) + + ! + ! allocate and fill in the coefficient matrix and initial vectors + ! + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,'CSR ',info,partition=3) + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='create_matrix' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2 + if (iam == psb_root_) write(psb_out_unit,'(" ")') + + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'pde',idim,'-',iam,'-',np,'.mtx' + call a%print(fname,head='PDEGEN test matrix') + end if + + select case(psb_toupper(acfmt)) + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case('DIA') + acmold => adia + case('HDIA') + acmold => ahdia + case('CSR') + acmold => acsr + case('COO') + acmold => acoo +#ifdef HAVE_RSB + case('RSB') + acmold => arsb +#endif + case default + write(*,*) 'Unknown format defaulting to HLL' + acmold => ahll + end select + call a%cscnv(info,mold=acmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('HDIAG') + agmold => ahdiag + case('CSRG') + agmold => acsrg + case('DNSG') + agmold => adnsg +#if CUDA_SHORT_VERSION <= 10 + case('HYBG') + agmold => ahybg +#endif + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select + call a%cscnv(agpu,info,mold=agmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + call desc_a%cnv(mold=imold) + + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_geasb(xg,desc_a,info,scratch=.true.,mold=vmold) +#endif + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x0,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + x0(i) = 1.0 + (1.0*ig)/nrg + end do + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + call psb_geall(x1,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + call psb_geasb(x1,desc_a,info) + call xv%bld(x0) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(x0,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + + call xv%set(x0) + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,a,xv,dzero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + call xg%set(x0) + + ! FIXME: cache flush needed here + x1 = bv%get_vect() + x2 = bg%get_vect() + + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,agpu,xv,dzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + x1 = bv%get_vect() + x2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + ! FIXME: cache flush needed here + call xg%set(x0) + call xg%sync() + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) + ! For timing purposes we need to make sure all threads + ! in the device are done. + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + x1 = bv%get_vect() + x2 = bg%get_vect() + call psb_geaxpby(-done,bg,+done,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a)')'XCPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x1(1:nr),'Local part CPU',info,filename=fname) + write(fname,'(a,i3.3,a,i3.3,a)')'XGPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x2(1:nr),'Local part GPU',info,filename=fname) + end if + +#endif + annz = a%get_nzeros() + amatsize = a%sizeof() + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,nr) + call psb_sum(ctxt,annz) + call psb_sum(ctxt,amatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,& + & '("Matrix: ell1 ",i0)') idim + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nr + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation : ",i20," ")') amatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU.) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU.) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU.) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*psb_sizeof_dp + psb_sizeof_ip)+& + & annz*(psb_sizeof_dp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth + bdwdth = psb_gpu_MemoryPeakBandwidth() + write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + ! + ! cleanup storage and exit + ! + call psb_gefree(bv,desc_a,info) + call psb_gefree(xv,desc_a,info) + call psb_spfree(a,desc_a,info) + call psb_cdfree(desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='free routine' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if +#ifdef HAVE_GPU + call psb_gpu_exit() +#endif + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +contains + ! + ! get iteration parameters from standard input + ! + subroutine get_parms(ctxt,acfmt,agfmt,idim) + type(psb_ctxt_type) :: ctxt + character(len=*) :: agfmt, acfmt + integer :: idim + integer :: np, iam + integer :: intbuf(10), ip + + call psb_info(ctxt, iam, np) + + if (iam == 0) then + write(*,*) 'CPU side format?' + read(psb_inp_unit,*) acfmt + write(*,*) 'GPU side format?' + read(psb_inp_unit,*) agfmt + write(*,*) 'Size of discretization cube?' + read(psb_inp_unit,*) idim + endif + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,idim) + + if (iam == 0) then + write(psb_out_unit,'("Testing matrix : ell1")') + write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim + write(psb_out_unit,'("Number of processors : ",i0)')np + write(psb_out_unit,'("Data distribution : BLOCK")') + write(psb_out_unit,'(" ")') + end if + return + + end subroutine get_parms + +end program pdgenmv diff --git a/test/gpukern/s_file_spmv.F90 b/test/gpukern/s_file_spmv.F90 new file mode 100644 index 00000000..37a52717 --- /dev/null +++ b/test/gpukern/s_file_spmv.F90 @@ -0,0 +1,496 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +program s_file_spmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_GPU + use psb_gpu_mod +#endif + use data_input + implicit none + + ! input parameters + character(len=200) :: mtrx_file + + ! sparse matrices + type(psb_sspmat_type) :: a, aux_a, agpu + + ! dense matrices + real(psb_spk_), allocatable, target :: aux_b(:,:), d(:) + real(psb_spk_), allocatable , save :: x_col_glob(:), r_col_glob(:) + real(psb_spk_), pointer :: b_col_glob(:) + type(psb_s_vect_type) :: b_col, x_col, r_col + type(psb_s_vect_type) :: xg, bg, xv, bv +#ifdef HAVE_GPU + type(psb_s_vect_gpu) :: vmold +#endif + real(psb_spk_), allocatable :: xc1(:),xc2(:) + ! communications data structure + type(psb_desc_type):: desc_a + + type(psb_ctxt_type) :: ctxt + integer :: iam, np + integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes + real(psb_spk_) :: err, eps, samatsize, sgmatsize + + character(len=5) :: acfmt, agfmt + character(len=20) :: name + character(len=2) :: filefmt + integer, parameter :: iunit=12 + integer, parameter :: times=2000 + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + + type(psb_s_coo_sparse_mat), target :: acoo + type(psb_s_csr_sparse_mat), target :: acsr + type(psb_s_ell_sparse_mat), target :: aell + type(psb_s_hll_sparse_mat), target :: ahll + type(psb_s_hdia_sparse_mat), target :: ahdia +#ifdef HAVE_GPU + type(psb_s_elg_sparse_mat), target :: aelg + type(psb_s_csrg_sparse_mat), target :: acsrg + type(psb_s_hybg_sparse_mat), target :: ahybg + type(psb_s_hlg_sparse_mat), target :: ahlg + type(psb_s_hdiag_sparse_mat), target :: ahdiag +#endif + class(psb_s_base_sparse_mat), pointer :: acmold, agmold + ! other variables + integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg + integer :: internal, m,ii,nnzero + real(psb_dpk_) :: t0,t1, t2, tprec, flops + real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + integer :: nrhs, nrow, n_row, dim, nv, ne + integer, allocatable :: ivg(:), ipv(:) + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) +#ifdef HAVE_GPU + call psb_gpu_init(ctxt) +#endif + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + + + name='file_spmv' + if(psb_get_errstatus() /= 0) goto 9999 + info=psb_success_ + call psb_set_errverbosity(2) + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_GPU + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) +#endif + + if (iam == 0) then + write(*,*) 'Matrix? ' + call read_data(mtrx_file,psb_inp_unit) + write(*,*) 'file format' + call read_data(filefmt,psb_inp_unit) + write(*,*) 'CPU format' + call read_data(acfmt,psb_inp_unit) + write(*,*) 'GPU format' + call read_data(agfmt,psb_inp_unit) + write(*,*) 'distribution ' + call read_data(ipart,psb_inp_unit) + write(*,*) 'Read all data, going on' + end if + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,ipart) + call psb_barrier(ctxt) + t0 = psb_wtime() + ! read the input matrix to be processed and (possibly) the rhs + nrhs = 1 + + if (iam==psb_root_) then + select case(psb_toupper(filefmt)) + case('MM') + ! For Matrix Market we have an input file for the matrix + ! and an (optional) second file for the RHS. + call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case ('HB') + ! For Harwell-Boeing we have a single file which may or may not + ! contain an RHS. + call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case default + info = -1 + write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt + end select + if (info /= 0) then + write(psb_err_unit,*) 'Error while reading input matrix ' + call psb_abort(ctxt) + end if + + ! + ! Always get nnz from original matrix. + ! Some formats add fill-in and do not keep track + ! of how many were added. So if the original matrix + ! contained some extra zeros, the count of entries + ! is not recoverable exactly. + ! + nrt = aux_a%get_nrows() + annz = aux_a%get_nzeros() + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + write(psb_out_unit,'("Generating an rhs...")') + write(psb_out_unit,'(" ")') + call psb_realloc(nrt,1,aux_b,info) + if (info /= 0) then + call psb_errpush(4000,name) + goto 9999 + endif + + b_col_glob => aux_b(:,1) + do i=1, nrt + b_col_glob(i) = 1.d0 + enddo + + else + + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + end if + + + select case(psb_toupper(acfmt)) + case('COO') + acmold => acoo + case('CSR') + acmold => acsr + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case('HDIA') + acmold => ahdia + case default + write(*,*) 'Unknown format defaulting to CSR' + acmold => acsr + end select + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('CSRG') + agmold => acsrg + case('HYBG') + agmold => ahybg + case('HDIAG') + agmold => ahdiag + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select +#endif + + + ! switch over different partition types + if (ipart == 0) then + call psb_barrier(ctxt) + if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') + allocate(ivg(nrt),ipv(np)) + do i=1,nrt + call part_block(i,nrt,np,ipv,nv) + ivg(i) = ipv(1) + enddo + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else if (ipart == 2) then + if (iam==psb_root_) then + write(psb_out_unit,'("Partition type: graph")') + write(psb_out_unit,'(" ")') + ! write(psb_err_unit,'("Build type: graph")') + call build_mtpart(aux_a,np) + endif + call psb_barrier(ctxt) + call distr_mtpart(psb_root_,ctxt) + call getv_mtpart(ivg) + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else + if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') + call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) + end if + + call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) + + t2 = psb_wtime() - t0 + + call psb_amx(ctxt, t2) + + if (iam==psb_root_) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 + write(psb_out_unit,'(" ")') + end if + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x_col,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + call psb_geins(ione,(/ig/),(/(sone + (sone*ig)/nrg)/),x_col,desc_a,info) + end do + call psb_geasb(x_col,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + xc1 = x_col%get_vect() + call xv%bld(xc1) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(xc1,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(sone,a,xv,szero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + ! FIXME: cache flush needed here + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(sone,agpu,xv,szero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + xc1 = bv%get_vect() + xc2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + call xg%sync() + ! FIXME: cache flush needed here + + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(sone,agpu,xg,szero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + ! For timing purposes we need to make sure all threads + ! in the device are done. + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + xc1 = bv%get_vect() + xc2 = bg%get_vect() + call psb_geaxpby(-sone,bg,+sone,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps +#endif + + + amatsize = a%sizeof() + agmatsize = agpu%sizeof() + samatsize = amatsize + samatsize = samatsize/(1024*1024) + sgmatsize = agmatsize + sgmatsize = sgmatsize/(1024*1024) + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,samatsize) + call psb_sum(ctxt,sgmatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,'("Matrix: ",a)') mtrx_file + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nrt + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation CPU (MBytes) : ",f20.2," ")') samatsize + write(psb_out_unit,& + &'("Memory occupation GPU (MBytes) : ",f20.2," ")') sgmatsize + write(psb_out_unit,& + &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize + write(psb_out_unit,& + &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*psb_sizeof_sp + psb_sizeof_ip)+& + & annz*(psb_sizeof_sp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + call psb_gefree(b_col, desc_a,info) + call psb_gefree(x_col, desc_a,info) + call psb_gefree(xv, desc_a,info) + call psb_gefree(bv, desc_a,info) + call psb_spfree(a, desc_a,info) +#ifdef HAVE_GPU + call psb_gefree(xg, desc_a,info) + call psb_gefree(bg, desc_a,info) + call psb_spfree(agpu,desc_a,info) + call psb_gpu_exit() +#endif + call psb_cdfree(desc_a,info) + + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +end program s_file_spmv + + + + + diff --git a/test/gpukern/spdegenmv.F90 b/test/gpukern/spdegenmv.F90 new file mode 100644 index 00000000..1c7d646f --- /dev/null +++ b/test/gpukern/spdegenmv.F90 @@ -0,0 +1,989 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: spdegenmv.f90 +! +! Program: pdegenmv +! This sample program measures the performance of the matrix-vector product. +! The matrix is generated in the same way as for the pdegen test case of +! the main PSBLAS library. +! +! +module psb_s_pde3d_mod + + + use psb_base_mod, only : psb_spk_, psb_ipk_, psb_lpk_, psb_desc_type,& + & psb_sspmat_type, psb_s_vect_type, szero,& + & psb_s_base_sparse_mat, psb_s_base_vect_type, & + & psb_i_base_vect_type, psb_l_base_vect_type + + interface + function s_func_3d(x,y,z) result(val) + import :: psb_spk_ + real(psb_spk_), intent(in) :: x,y,z + real(psb_spk_) :: val + end function s_func_3d + end interface + + interface psb_gen_pde3d + module procedure psb_s_gen_pde3d + end interface psb_gen_pde3d + +contains + + function s_null_func_3d(x,y,z) result(val) + + real(psb_spk_), intent(in) :: x,y,z + real(psb_spk_) :: val + + val = szero + + end function s_null_func_3d + ! + ! functions parametrizing the differential equation + ! + function b1(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: b1 + real(psb_spk_), intent(in) :: x,y,z + b1=sone/sqrt((3*sone)) + end function b1 + function b2(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: b2 + real(psb_spk_), intent(in) :: x,y,z + b2=sone/sqrt((3*sone)) + end function b2 + function b3(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: b3 + real(psb_spk_), intent(in) :: x,y,z + b3=sone/sqrt((3*sone)) + end function b3 + function c(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: c + real(psb_spk_), intent(in) :: x,y,z + c=szero + end function c + function a1(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: a1 + real(psb_spk_), intent(in) :: x,y,z + a1=sone/80 + end function a1 + function a2(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: a2 + real(psb_spk_), intent(in) :: x,y,z + a2=sone/80 + end function a2 + function a3(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: a3 + real(psb_spk_), intent(in) :: x,y,z + a3=sone/80 + end function a3 + function g(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: g + real(psb_spk_), intent(in) :: x,y,z + g = szero + if (x == sone) then + g = sone + else if (x == szero) then + g = exp(y**2-z**2) + end if + end function g + + + ! + ! subroutine to allocate and fill in the coefficient matrix and + ! the rhs. + ! + subroutine psb_s_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& + & f,amold,vmold,imold,partition,nrl,iv) + use psb_base_mod + use psb_util_mod + ! + ! Discretizes the partial differential equation + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f + ! dxdx dydy dzdz dx dy dz + ! + ! with Dirichlet boundary conditions + ! u = g + ! + ! on the unit cube 0<=x,y,z<=1. + ! + ! + ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. + ! + implicit none + integer(psb_ipk_) :: idim + type(psb_sspmat_type) :: a + type(psb_s_vect_type) :: xv,bv + type(psb_desc_type) :: desc_a + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: info + character(len=*) :: afmt + procedure(s_func_3d), optional :: f + class(psb_s_base_sparse_mat), optional :: amold + class(psb_s_base_vect_type), optional :: vmold + class(psb_i_base_vect_type), optional :: imold + integer(psb_ipk_), optional :: partition, nrl,iv(:) + + ! Local variables. + + integer(psb_ipk_), parameter :: nb=20 + type(psb_s_csc_sparse_mat) :: acsc + type(psb_s_coo_sparse_mat) :: acoo + type(psb_s_csr_sparse_mat) :: acsr + real(psb_spk_) :: zt(nb),x,y,z + integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ + integer(psb_lpk_) :: m,n,glob_row,nt + integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner + ! For 3D partition + ! Note: integer control variables going directly into an MPI call + ! must be 4 bytes, i.e. psb_mpk_ + integer(psb_mpk_) :: npdims(3), npp, minfo + integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: icoeff + integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) + real(psb_spk_), allocatable :: val(:) + ! deltah dimension of each grid cell + ! deltat discretization time + real(psb_spk_) :: deltah, sqdeltah, deltah2 + real(psb_spk_), parameter :: rhs=szero,one=sone,zero=szero + real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb + integer(psb_ipk_) :: err_act + procedure(s_func_3d), pointer :: f_ + character(len=20) :: name, ch_err,tmpfmt + + info = psb_success_ + name = 'create_matrix' + call psb_erractionsave(err_act) + + call psb_info(ctxt, iam, np) + + + if (present(f)) then + f_ => f + else + f_ => s_null_func_3d + end if + + deltah = sone/(idim+2) + sqdeltah = deltah*deltah + deltah2 = (2*sone)* deltah + + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + + ! initialize array descriptor and sparse matrix storage. provide an + ! estimate of the number of non zeroes + + m = (1_psb_lpk_*idim)*idim*idim + n = m + nnz = ((n*7)/(np)) + if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n + t0 = psb_wtime() + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ctxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ctxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ctxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,3,npdims,info) + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ctxt,desc_a,info,vl=myidx) + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end select + + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz,& + & dupl=psb_dupl_err_) + ! define rhs from boundary conditions; also build initial guess + if (info == psb_success_) call psb_geall(xv,desc_a,info) + if (info == psb_success_) call psb_geall(bv,desc_a,info) + + call psb_barrier(ctxt) + talc = psb_wtime()-t0 + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='allocation rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! we build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! + allocate(val(20*nb),irow(20*nb),& + &icol(20*nb),stat=info) + if (info /= psb_success_ ) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! loop over rows belonging to current process in a block + ! distribution. + + call psb_barrier(ctxt) + t1 = psb_wtime() + do ii=1, nlr,nb + ib = min(nb,nlr-ii+1) + icoeff = 1 + do k=1,ib + i=ii+k-1 + ! local matrix pointer + glob_row=myidx(i) + ! compute gridpoint coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates + x = (ix-1)*deltah + y = (iy-1)*deltah + z = (iz-1)*deltah + zt(k) = f_(x,y,z) + ! internal point: build discretization + ! + ! term depending on (x-1,y,z) + ! + val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 + if (ix == 1) then + zt(k) = g(szero,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y-1,z) + val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 + if (iy == 1) then + zt(k) = g(x,szero,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y,z-1) + val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 + if (iz == 1) then + zt(k) = g(x,y,szero)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + ! term depending on (x,y,z) + val(icoeff)=(2*sone)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & + & + c(x,y,z) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + ! term depending on (x,y,z+1) + val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 + if (iz == idim) then + zt(k) = g(x,y,sone)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y+1,z) + val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 + if (iy == idim) then + zt(k) = g(x,sone,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x+1,y,z) + val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 + if (ix==idim) then + zt(k) = g(sone,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + end do + call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) exit + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) + if(info /= psb_success_) exit + zt(:)=szero + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) + if(info /= psb_success_) exit + end do + + tgen = psb_wtime()-t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='insert rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(val,irow,icol) + + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_cdasb(desc_a,info,mold=imold) + tcdasb = psb_wtime()-t1 + call psb_barrier(ctxt) + t1 = psb_wtime() + if (info == psb_success_) then + if (present(amold)) then + call psb_spasb(a,desc_a,info,mold=amold) + else + call psb_spasb(a,desc_a,info,afmt=afmt) + end if + end if + call psb_barrier(ctxt) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold) + if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + tasb = psb_wtime()-t1 + call psb_barrier(ctxt) + ttot = psb_wtime() - t0 + + call psb_amx(ctxt,talc) + call psb_amx(ctxt,tgen) + call psb_amx(ctxt,tasb) + call psb_amx(ctxt,ttot) + if(iam == psb_root_) then + tmpfmt = a%get_fmt() + write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')& + & tmpfmt + write(psb_out_unit,'("-allocation time : ",es12.5)') talc + write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen + write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb + write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb + write(psb_out_unit,'("-total time : ",es12.5)') ttot + + end if + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + end subroutine psb_s_gen_pde3d + + +end module psb_s_pde3d_mod + + +program pdgenmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_GPU + use psb_gpu_mod +#endif + use psb_s_pde3d_mod + implicit none + + ! input parameters + character(len=5) :: acfmt, agfmt + integer :: idim + + ! miscellaneous + real(psb_spk_), parameter :: one = 1.e0 + real(psb_dpk_) :: t1, t2, tprec, flops, tflops,& + & tt1, tt2, gt1, gt2, gflops, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + + ! sparse matrix and preconditioner + type(psb_sspmat_type) :: a, agpu, aux_a + ! descriptor + type(psb_desc_type) :: desc_a + ! dense matrices + type(psb_s_vect_type), target :: xv,bv, xg, bg +#ifdef HAVE_GPU + type(psb_s_vect_gpu) :: vmold + type(psb_i_vect_gpu) :: imold +#endif + real(psb_spk_), allocatable :: x1(:), x2(:), x0(:) + ! blacs parameters + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + ! solver parameters + integer(psb_epk_) :: amatsize, precsize, descsize, annz, nbytes + real(psb_spk_) :: err, eps + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + type(psb_s_coo_sparse_mat), target :: acoo + type(psb_s_csr_sparse_mat), target :: acsr + type(psb_s_ell_sparse_mat), target :: aell + type(psb_s_hll_sparse_mat), target :: ahll + type(psb_s_dia_sparse_mat), target :: adia + type(psb_s_hdia_sparse_mat), target :: ahdia +#ifdef HAVE_GPU + type(psb_s_elg_sparse_mat), target :: aelg + type(psb_s_csrg_sparse_mat), target :: acsrg +#if CUDA_SHORT_VERSION <= 10 + type(psb_s_hybg_sparse_mat), target :: ahybg +#endif + type(psb_s_hlg_sparse_mat), target :: ahlg + type(psb_s_dnsg_sparse_mat), target :: adnsg + type(psb_s_hdiag_sparse_mat), target :: ahdiag +#endif + class(psb_s_base_sparse_mat), pointer :: agmold, acmold + ! other variables + logical, parameter :: dump=.false. + integer(psb_ipk_) :: info, i, j, nr, nrg + integer(psb_lpk_) :: ig + character(len=20) :: name,ch_err + character(len=40) :: fname + + info=psb_success_ + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + +#ifdef HAVE_GPU + call psb_gpu_init(ctxt) +#endif + + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + if(psb_get_errstatus() /= 0) goto 9999 + name='pdegenmv-gpu' + ! + ! Hello world + ! + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_GPU + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) +#endif + ! + ! get parameters + ! + call get_parms(ctxt,acfmt,agfmt,idim) + + ! + ! allocate and fill in the coefficient matrix and initial vectors + ! + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,'CSR ',info,partition=3) + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='create_matrix' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2 + if (iam == psb_root_) write(psb_out_unit,'(" ")') + + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'pde',idim,'-',iam,'-',np,'.mtx' + call a%print(fname,head='PDEGEN test matrix') + end if + + select case(psb_toupper(acfmt)) + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case('DIA') + acmold => adia + case('HDIA') + acmold => ahdia + case('CSR') + acmold => acsr + case('COO') + acmold => acoo +#ifdef HAVE_RSB + case('RSB') + acmold => arsb +#endif + case default + write(*,*) 'Unknown format defaulting to HLL' + acmold => ahll + end select + call a%cscnv(info,mold=acmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('HDIAG') + agmold => ahdiag + case('CSRG') + agmold => acsrg + case('DNSG') + agmold => adnsg +#if CUDA_SHORT_VERSION <= 10 + case('HYBG') + agmold => ahybg +#endif + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select + call a%cscnv(agpu,info,mold=agmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + call desc_a%cnv(mold=imold) + + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_geasb(xg,desc_a,info,scratch=.true.,mold=vmold) +#endif + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x0,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + x0(i) = 1.0 + (1.0*ig)/nrg + end do + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + call psb_geall(x1,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + call psb_geasb(x1,desc_a,info) + call xv%bld(x0) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(x0,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + + call xv%set(x0) + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(sone,a,xv,szero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + call xg%set(x0) + + ! FIXME: cache flush needed here + x1 = bv%get_vect() + x2 = bg%get_vect() + + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(sone,agpu,xv,szero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + x1 = bv%get_vect() + x2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + + ! FIXME: cache flush needed here + call xg%set(x0) + call xg%sync() + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(sone,agpu,xg,szero,bg,desc_a,info) + ! For timing purposes we need to make sure all threads + ! in the device are done. + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + x1 = bv%get_vect() + x2 = bg%get_vect() + call psb_geaxpby(-sone,bg,+sone,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a)')'XCPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x1(1:nr),'Local part CPU',info,filename=fname) + write(fname,'(a,i3.3,a,i3.3,a)')'XGPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x2(1:nr),'Local part GPU',info,filename=fname) + end if +#endif + annz = a%get_nzeros() + amatsize = a%sizeof() + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,nr) + call psb_sum(ctxt,annz) + call psb_sum(ctxt,amatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,& + & '("Matrix: ell1 ",i0)') idim + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nr + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation : ",i20," ")') amatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU.) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU.) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU.) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*psb_sizeof_sp + psb_sizeof_ip)+& + & annz*(psb_sizeof_sp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth + bdwdth = psb_gpu_MemoryPeakBandwidth() + write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + ! + ! cleanup storage and exit + ! + call psb_gefree(bv,desc_a,info) + call psb_gefree(xv,desc_a,info) + call psb_spfree(a,desc_a,info) + call psb_cdfree(desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='free routine' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if +#ifdef HAVE_GPU + call psb_gpu_exit() +#endif + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +contains + ! + ! get iteration parameters from standard input + ! + subroutine get_parms(ctxt,acfmt,agfmt,idim) + type(psb_ctxt_type) :: ctxt + character(len=*) :: agfmt, acfmt + integer :: idim + integer :: np, iam + integer :: intbuf(10), ip + + call psb_info(ctxt, iam, np) + + if (iam == 0) then + write(*,*) 'CPU side format?' + read(psb_inp_unit,*) acfmt + write(*,*) 'GPU side format?' + read(psb_inp_unit,*) agfmt + write(*,*) 'Size of discretization cube?' + read(psb_inp_unit,*) idim + endif + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,idim) + + if (iam == 0) then + write(psb_out_unit,'("Testing matrix : ell1")') + write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim + write(psb_out_unit,'("Number of processors : ",i0)')np + write(psb_out_unit,'("Data distribution : BLOCK")') + write(psb_out_unit,'(" ")') + end if + return + + end subroutine get_parms + + +end program pdgenmv diff --git a/test/gpukern/z_file_spmv.F90 b/test/gpukern/z_file_spmv.F90 new file mode 100644 index 00000000..153dd5e1 --- /dev/null +++ b/test/gpukern/z_file_spmv.F90 @@ -0,0 +1,491 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +program z_file_spmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_GPU + use psb_gpu_mod +#endif + use data_input + implicit none + + ! input parameters + character(len=200) :: mtrx_file + + ! sparse matrices + type(psb_zspmat_type) :: a, aux_a, agpu + + ! dense matrices + complex(psb_dpk_), allocatable, target :: aux_b(:,:), d(:) + complex(psb_dpk_), allocatable , save :: x_col_glob(:), r_col_glob(:) + complex(psb_dpk_), pointer :: b_col_glob(:) + type(psb_z_vect_type) :: b_col, x_col, r_col + type(psb_z_vect_type) :: xg, bg, xv, bv +#ifdef HAVE_GPU + type(psb_z_vect_gpu) :: vmold +#endif + complex(psb_dpk_), allocatable :: xc1(:),xc2(:) + ! communications data structure + type(psb_desc_type):: desc_a + + type(psb_ctxt_type) :: ctxt + integer :: iam, np + integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes + real(psb_dpk_) :: damatsize, dgmatsize + complex(psb_dpk_) :: err, eps + + character(len=5) :: acfmt, agfmt + character(len=20) :: name + character(len=2) :: filefmt + integer, parameter :: iunit=12 + integer, parameter :: times=2000 + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + + type(psb_z_coo_sparse_mat), target :: acoo + type(psb_z_csr_sparse_mat), target :: acsr + type(psb_z_ell_sparse_mat), target :: aell + type(psb_z_hll_sparse_mat), target :: ahll +#ifdef HAVE_GPU + type(psb_z_elg_sparse_mat), target :: aelg + type(psb_z_csrg_sparse_mat), target :: acsrg + type(psb_z_hybg_sparse_mat), target :: ahybg + type(psb_z_hlg_sparse_mat), target :: ahlg +#endif + class(psb_z_base_sparse_mat), pointer :: acmold, agmold + ! other variables + integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg + integer :: internal, m,ii,nnzero + real(psb_dpk_) :: t0,t1, t2, tprec, flops + real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + integer :: nrhs, nrow, n_row, dim, nv, ne + integer, allocatable :: ivg(:), ipv(:) + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) +#ifdef HAVE_GPU + call psb_gpu_init(ctxt) +#endif + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + + + name='file_spmv' + if(psb_get_errstatus() /= 0) goto 9999 + info=psb_success_ + call psb_set_errverbosity(2) + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_GPU + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) +#endif + + if (iam == 0) then + write(*,*) 'Matrix? ' + call read_data(mtrx_file,psb_inp_unit) + write(*,*) 'file format' + call read_data(filefmt,psb_inp_unit) + write(*,*) 'CPU format' + call read_data(acfmt,psb_inp_unit) + write(*,*) 'GPU format' + call read_data(agfmt,psb_inp_unit) + write(*,*) 'distribution ' + call read_data(ipart,psb_inp_unit) + write(*,*) 'Read all data, going on' + end if + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,ipart) + call psb_barrier(ctxt) + t0 = psb_wtime() + ! read the input matrix to be processed and (possibly) the rhs + nrhs = 1 + + if (iam==psb_root_) then + select case(psb_toupper(filefmt)) + case('MM') + ! For Matrix Market we have an input file for the matrix + ! and an (optional) second file for the RHS. + call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case ('HB') + ! For Harwell-Boeing we have a single file which may or may not + ! contain an RHS. + call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case default + info = -1 + write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt + end select + if (info /= 0) then + write(psb_err_unit,*) 'Error while reading input matrix ' + call psb_abort(ctxt) + end if + + ! + ! Always get nnz from original matrix. + ! Some formats add fill-in and do not keep track + ! of how many were added. So if the original matrix + ! contained some extra zeros, the count of entries + ! is not recoverable exactly. + ! + nrt = aux_a%get_nrows() + annz = aux_a%get_nzeros() + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + write(psb_out_unit,'("Generating an rhs...")') + write(psb_out_unit,'(" ")') + call psb_realloc(nrt,1,aux_b,info) + if (info /= 0) then + call psb_errpush(4000,name) + goto 9999 + endif + + b_col_glob => aux_b(:,1) + do i=1, nrt + b_col_glob(i) = 1.d0 + enddo + + else + + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + end if + + + select case(psb_toupper(acfmt)) + case('COO') + acmold => acoo + case('CSR') + acmold => acsr + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case default + write(*,*) 'Unknown format defaulting to CSR' + acmold => acsr + end select + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('CSRG') + agmold => acsrg + case('HYBG') + agmold => ahybg + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select +#endif + + + ! switch over different partition types + if (ipart == 0) then + call psb_barrier(ctxt) + if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') + allocate(ivg(nrt),ipv(np)) + do i=1,nrt + call part_block(i,nrt,np,ipv,nv) + ivg(i) = ipv(1) + enddo + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else if (ipart == 2) then + if (iam==psb_root_) then + write(psb_out_unit,'("Partition type: graph")') + write(psb_out_unit,'(" ")') + ! write(psb_err_unit,'("Build type: graph")') + call build_mtpart(aux_a,np) + endif + call psb_barrier(ctxt) + call distr_mtpart(psb_root_,ctxt) + call getv_mtpart(ivg) + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else + if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') + call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) + end if + + call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) + + t2 = psb_wtime() - t0 + + call psb_amx(ctxt, t2) + + if (iam==psb_root_) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 + write(psb_out_unit,'(" ")') + end if + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x_col,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + call psb_geins(ione,(/ig/),(/(zone + (zone*ig)/nrg)/),x_col,desc_a,info) + end do + call psb_geasb(x_col,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + xc1 = x_col%get_vect() + call xv%bld(xc1) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(xc1,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(zone,a,xv,zzero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + ! FIXME: cache flush needed here + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(zone,agpu,xv,zzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + xc1 = bv%get_vect() + xc2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + call xg%sync() + ! FIXME: cache flush needed here + + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(zone,agpu,xg,zzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + ! For timing purposes we need to make sure all threads + ! in the device are done. + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + xc1 = bv%get_vect() + xc2 = bg%get_vect() + call psb_geaxpby(-zone,bg,+zone,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps +#endif + + + amatsize = a%sizeof() + agmatsize = agpu%sizeof() + damatsize = amatsize + damatsize = damatsize/(1024*1024) + dgmatsize = agmatsize + dgmatsize = dgmatsize/(1024*1024) + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,damatsize) + call psb_sum(ctxt,dgmatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,'("Matrix: ",a)') mtrx_file + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nrt + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize + write(psb_out_unit,& + &'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize + write(psb_out_unit,& + &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize + write(psb_out_unit,& + &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*2*psb_sizeof_dp + psb_sizeof_ip)+& + & annz*(2*psb_sizeof_dp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + call psb_gefree(b_col, desc_a,info) + call psb_gefree(x_col, desc_a,info) + call psb_gefree(xv, desc_a,info) + call psb_gefree(bv, desc_a,info) + call psb_spfree(a, desc_a,info) +#ifdef HAVE_GPU + call psb_gefree(xg, desc_a,info) + call psb_gefree(bg, desc_a,info) + call psb_spfree(agpu,desc_a,info) + call psb_gpu_exit() +#endif + call psb_cdfree(desc_a,info) + + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +end program z_file_spmv + + + + + From a2788bdf0ba1a4b6e4bf3bb447b49da76d4db931 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 7 Nov 2023 13:39:44 +0100 Subject: [PATCH 010/110] New version with ND product --- base/psblas/psb_cspmm.f90 | 23 ++++++++++++++----- base/psblas/psb_dspmm.f90 | 44 ++++++++++++++++++++++++++----------- base/psblas/psb_sspmm.f90 | 23 ++++++++++++++----- base/psblas/psb_zspmm.f90 | 23 ++++++++++++++----- base/tools/psb_cspasb.f90 | 2 +- base/tools/psb_sspasb.f90 | 2 +- base/tools/psb_zspasb.f90 | 2 +- test/pargen/psb_d_pde3d.F90 | 4 ++-- test/pargen/runs/ppde.inp | 2 +- 9 files changed, 88 insertions(+), 37 deletions(-) diff --git a/base/psblas/psb_cspmm.f90 b/base/psblas/psb_cspmm.f90 index 84d8a7d8..25a6bc56 100644 --- a/base/psblas/psb_cspmm.f90 +++ b/base/psblas/psb_cspmm.f90 @@ -180,12 +180,23 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& ! Matrix is not transposed if (allocated(a%ad)) then - if (doswap_) call psi_swapdata(psb_swap_send_,& - & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%and%spmm(alpha,x%v,cone,y%v,info) + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + if (do_timings) t1= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_send_,& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t2= psb_wtime() + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) t3= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t4= psb_wtime() + call a%and%spmm(alpha,x%v,cone,y%v,info) + if (do_timings) t5= psb_wtime() + if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 + end block else if (doswap_) then diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index d5897f82..7888188a 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -180,22 +180,40 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& ! Matrix is not transposed if (allocated(a%ad)) then - if (doswap_) call psi_swapdata(psb_swap_send_,& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%and%spmm(alpha,x%v,done,y%v,info) + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + if (do_timings) t1= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_send_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t2= psb_wtime() + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) t3= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t4= psb_wtime() + call a%and%spmm(alpha,x%v,done,y%v,info) + if (do_timings) t5= psb_wtime() + if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 + end block else - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + if (do_timings) t1= psb_wtime() + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) t2= psb_wtime() + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) t3= psb_wtime() + if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2 + end block end if - - call psb_csmm(alpha,a,x,beta,y,info) - - end if if(info /= psb_success_) then info = psb_err_from_subroutine_non_ diff --git a/base/psblas/psb_sspmm.f90 b/base/psblas/psb_sspmm.f90 index 7c1e0ab3..cf8919f0 100644 --- a/base/psblas/psb_sspmm.f90 +++ b/base/psblas/psb_sspmm.f90 @@ -180,12 +180,23 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& ! Matrix is not transposed if (allocated(a%ad)) then - if (doswap_) call psi_swapdata(psb_swap_send_,& - & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%and%spmm(alpha,x%v,sone,y%v,info) + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + if (do_timings) t1= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_send_,& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t2= psb_wtime() + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) t3= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t4= psb_wtime() + call a%and%spmm(alpha,x%v,sone,y%v,info) + if (do_timings) t5= psb_wtime() + if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 + end block else if (doswap_) then diff --git a/base/psblas/psb_zspmm.f90 b/base/psblas/psb_zspmm.f90 index 4dc73f83..629fcf2b 100644 --- a/base/psblas/psb_zspmm.f90 +++ b/base/psblas/psb_zspmm.f90 @@ -180,12 +180,23 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& ! Matrix is not transposed if (allocated(a%ad)) then - if (doswap_) call psi_swapdata(psb_swap_send_,& - & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%and%spmm(alpha,x%v,zone,y%v,info) + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + if (do_timings) t1= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_send_,& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t2= psb_wtime() + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) t3= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t4= psb_wtime() + call a%and%spmm(alpha,x%v,zone,y%v,info) + if (do_timings) t5= psb_wtime() + if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 + end block else if (doswap_) then diff --git a/base/tools/psb_cspasb.f90 b/base/tools/psb_cspasb.f90 index 46258139..8263e309 100644 --- a/base/tools/psb_cspasb.f90 +++ b/base/tools/psb_cspasb.f90 @@ -183,7 +183,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and) type(psb_c_coo_sparse_mat) :: acoo type(psb_c_csr_sparse_mat), allocatable :: aclip type(psb_c_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.false. + logical, parameter :: use_ecsr=.true. allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) diff --git a/base/tools/psb_sspasb.f90 b/base/tools/psb_sspasb.f90 index 0edae30e..f273c7f4 100644 --- a/base/tools/psb_sspasb.f90 +++ b/base/tools/psb_sspasb.f90 @@ -183,7 +183,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and) type(psb_s_coo_sparse_mat) :: acoo type(psb_s_csr_sparse_mat), allocatable :: aclip type(psb_s_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.false. + logical, parameter :: use_ecsr=.true. allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) diff --git a/base/tools/psb_zspasb.f90 b/base/tools/psb_zspasb.f90 index cd77de15..1a381303 100644 --- a/base/tools/psb_zspasb.f90 +++ b/base/tools/psb_zspasb.f90 @@ -183,7 +183,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and) type(psb_z_coo_sparse_mat) :: acoo type(psb_z_csr_sparse_mat), allocatable :: aclip type(psb_z_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.false. + logical, parameter :: use_ecsr=.true. allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) diff --git a/test/pargen/psb_d_pde3d.F90 b/test/pargen/psb_d_pde3d.F90 index cd503d29..e802736e 100644 --- a/test/pargen/psb_d_pde3d.F90 +++ b/test/pargen/psb_d_pde3d.F90 @@ -680,9 +680,9 @@ contains t1 = psb_wtime() if (info == psb_success_) then if (present(amold)) then - call psb_spasb(a,desc_a,info,mold=amold,bld_and=.true.) + call psb_spasb(a,desc_a,info,mold=amold,bld_and=.false.) else - call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=.true.) + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=.false.) end if end if call psb_barrier(ctxt) diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index c70a973f..44dac085 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -5,7 +5,7 @@ CSR Storage format for matrix A: CSR COO 200 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) 3 Partition: 1 BLOCK 3 3D 2 Stopping criterion 1 2 -0300 MAXIT +0008 MAXIT 10 ITRACE 002 IRST restart for RGMRES and BiCGSTABL ILU Block Solver ILU,ILUT,INVK,AINVT,AORTH From a6ec655a97d14dbfdf3003e3aecfc32955b45080 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 7 Nov 2023 17:57:32 +0100 Subject: [PATCH 011/110] Prepare merge --- .../impl/{psb_c_csc_impl.f90 => psb_c_csc_impl.F90} | 0 .../impl/{psb_c_csr_impl.f90 => psb_c_csr_impl.F90} | 0 .../impl/{psb_d_csc_impl.f90 => psb_d_csc_impl.F90} | 0 .../impl/{psb_d_csr_impl.f90 => psb_d_csr_impl.F90} | 0 .../impl/{psb_s_csc_impl.f90 => psb_s_csc_impl.F90} | 0 .../impl/{psb_s_csr_impl.f90 => psb_s_csr_impl.F90} | 0 .../impl/{psb_z_csc_impl.f90 => psb_z_csc_impl.F90} | 0 .../impl/{psb_z_csr_impl.f90 => psb_z_csr_impl.F90} | 0 test/hello/Makefile | 10 ++++++++-- 9 files changed, 8 insertions(+), 2 deletions(-) rename base/serial/impl/{psb_c_csc_impl.f90 => psb_c_csc_impl.F90} (100%) rename base/serial/impl/{psb_c_csr_impl.f90 => psb_c_csr_impl.F90} (100%) rename base/serial/impl/{psb_d_csc_impl.f90 => psb_d_csc_impl.F90} (100%) rename base/serial/impl/{psb_d_csr_impl.f90 => psb_d_csr_impl.F90} (100%) rename base/serial/impl/{psb_s_csc_impl.f90 => psb_s_csc_impl.F90} (100%) rename base/serial/impl/{psb_s_csr_impl.f90 => psb_s_csr_impl.F90} (100%) rename base/serial/impl/{psb_z_csc_impl.f90 => psb_z_csc_impl.F90} (100%) rename base/serial/impl/{psb_z_csr_impl.f90 => psb_z_csr_impl.F90} (100%) diff --git a/base/serial/impl/psb_c_csc_impl.f90 b/base/serial/impl/psb_c_csc_impl.F90 similarity index 100% rename from base/serial/impl/psb_c_csc_impl.f90 rename to base/serial/impl/psb_c_csc_impl.F90 diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.F90 similarity index 100% rename from base/serial/impl/psb_c_csr_impl.f90 rename to base/serial/impl/psb_c_csr_impl.F90 diff --git a/base/serial/impl/psb_d_csc_impl.f90 b/base/serial/impl/psb_d_csc_impl.F90 similarity index 100% rename from base/serial/impl/psb_d_csc_impl.f90 rename to base/serial/impl/psb_d_csc_impl.F90 diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.F90 similarity index 100% rename from base/serial/impl/psb_d_csr_impl.f90 rename to base/serial/impl/psb_d_csr_impl.F90 diff --git a/base/serial/impl/psb_s_csc_impl.f90 b/base/serial/impl/psb_s_csc_impl.F90 similarity index 100% rename from base/serial/impl/psb_s_csc_impl.f90 rename to base/serial/impl/psb_s_csc_impl.F90 diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.F90 similarity index 100% rename from base/serial/impl/psb_s_csr_impl.f90 rename to base/serial/impl/psb_s_csr_impl.F90 diff --git a/base/serial/impl/psb_z_csc_impl.f90 b/base/serial/impl/psb_z_csc_impl.F90 similarity index 100% rename from base/serial/impl/psb_z_csc_impl.f90 rename to base/serial/impl/psb_z_csc_impl.F90 diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.F90 similarity index 100% rename from base/serial/impl/psb_z_csr_impl.f90 rename to base/serial/impl/psb_z_csr_impl.F90 diff --git a/test/hello/Makefile b/test/hello/Makefile index a6811ea7..f16ff75e 100644 --- a/test/hello/Makefile +++ b/test/hello/Makefile @@ -16,7 +16,7 @@ FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). EXEDIR=./runs -all: runsd hello pingpong +all: runsd hello pingpong tsum tsum1 runsd: (if test ! -d runs ; then mkdir runs; fi) @@ -28,11 +28,17 @@ hello: hello.o pingpong: pingpong.o $(FLINK) pingpong.o -o pingpong $(PSBLAS_LIB) $(LDLIBS) /bin/mv pingpong $(EXEDIR) +tsum: tsum.o + $(FLINK) tsum.o -o tsum $(PSBLAS_LIB) $(LDLIBS) + /bin/mv tsum $(EXEDIR) +tsum1: tsum1.o + $(FLINK) tsum1.o -o tsum1 $(PSBLAS_LIB) $(LDLIBS) + /bin/mv tsum1 $(EXEDIR) clean: - /bin/rm -f hello.o pingpong.o + /bin/rm -f hello.o pingpong.o tsum.o tsum1.o $(EXEDIR)/hello verycleanlib: (cd ../..; make veryclean) From 6fa0bf7fe7f5f9746bbbaf6a6d335ebe88aa97eb Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 28 Nov 2023 19:14:28 +0100 Subject: [PATCH 012/110] Complete cuda renaming --- cuda/Makefile | 108 +-- cuda/impl/Makefile | 540 ++++++------ ...oo.F90 => psb_c_cuda_cp_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_c_cuda_cp_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_c_cuda_cp_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_c_cuda_cp_elg_from_coo.F90} | 14 +- ...fmt.F90 => psb_c_cuda_cp_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_c_cuda_cp_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_c_cuda_cp_hlg_from_coo.F90} | 14 +- ...fmt.F90 => psb_c_cuda_cp_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_c_cuda_cp_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_c_cuda_cp_hybg_from_fmt.F90} | 10 +- ....F90 => psb_c_cuda_csrg_allocate_mnnz.F90} | 10 +- ...csrg_csmm.F90 => psb_c_cuda_csrg_csmm.F90} | 10 +- ...csrg_csmv.F90 => psb_c_cuda_csrg_csmv.F90} | 12 +- ...m_gpu.F90 => psb_c_cuda_csrg_from_gpu.F90} | 10 +- ....F90 => psb_c_cuda_csrg_inner_vect_sv.F90} | 18 +- ...csrg_mold.F90 => psb_c_cuda_csrg_mold.F90} | 10 +- ....F90 => psb_c_cuda_csrg_reallocate_nz.F90} | 12 +- ...csrg_scal.F90 => psb_c_cuda_csrg_scal.F90} | 10 +- ...rg_scals.F90 => psb_c_cuda_csrg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_c_cuda_csrg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_c_cuda_csrg_vect_mv.F90} | 18 +- ...diag_csmv.F90 => psb_c_cuda_diag_csmv.F90} | 12 +- ...diag_mold.F90 => psb_c_cuda_diag_mold.F90} | 10 +- ..._to_gpu.F90 => psb_c_cuda_diag_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_c_cuda_diag_vect_mv.F90} | 18 +- ..._impl.F90 => psb_c_cuda_dnsg_mat_impl.F90} | 126 +-- ...z.F90 => psb_c_cuda_elg_allocate_mnnz.F90} | 10 +- ...b_d_elg_asb.f90 => psb_c_cuda_elg_asb.f90} | 8 +- ...c_elg_csmm.F90 => psb_c_cuda_elg_csmm.F90} | 12 +- ...c_elg_csmv.F90 => psb_c_cuda_elg_csmv.F90} | 10 +- ...elg_csput.F90 => psb_c_cuda_elg_csput.F90} | 32 +- ...om_gpu.F90 => psb_c_cuda_elg_from_gpu.F90} | 10 +- ...v.F90 => psb_c_cuda_elg_inner_vect_sv.F90} | 14 +- ...c_elg_mold.F90 => psb_c_cuda_elg_mold.F90} | 10 +- ...z.F90 => psb_c_cuda_elg_reallocate_nz.F90} | 12 +- ...c_elg_scal.F90 => psb_c_cuda_elg_scal.F90} | 10 +- ...elg_scals.F90 => psb_c_cuda_elg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_c_cuda_elg_to_gpu.F90} | 10 +- ...d_elg_trim.f90 => psb_c_cuda_elg_trim.f90} | 8 +- ...vect_mv.F90 => psb_c_cuda_elg_vect_mv.F90} | 18 +- ...iag_csmv.F90 => psb_c_cuda_hdiag_csmv.F90} | 12 +- ...iag_mold.F90 => psb_c_cuda_hdiag_mold.F90} | 10 +- ...to_gpu.F90 => psb_c_cuda_hdiag_to_gpu.F90} | 10 +- ...ct_mv.F90 => psb_c_cuda_hdiag_vect_mv.F90} | 18 +- ...z.F90 => psb_c_cuda_hlg_allocate_mnnz.F90} | 10 +- ...c_hlg_csmm.F90 => psb_c_cuda_hlg_csmm.F90} | 12 +- ...c_hlg_csmv.F90 => psb_c_cuda_hlg_csmv.F90} | 12 +- ...om_gpu.F90 => psb_c_cuda_hlg_from_gpu.F90} | 10 +- ...v.F90 => psb_c_cuda_hlg_inner_vect_sv.F90} | 12 +- ...c_hlg_mold.F90 => psb_c_cuda_hlg_mold.F90} | 10 +- ...z.F90 => psb_c_cuda_hlg_reallocate_nz.F90} | 12 +- ...c_hlg_scal.F90 => psb_c_cuda_hlg_scal.F90} | 10 +- ...hlg_scals.F90 => psb_c_cuda_hlg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_c_cuda_hlg_to_gpu.F90} | 10 +- ...vect_mv.F90 => psb_c_cuda_hlg_vect_mv.F90} | 18 +- ....F90 => psb_c_cuda_hybg_allocate_mnnz.F90} | 10 +- ...hybg_csmm.F90 => psb_c_cuda_hybg_csmm.F90} | 12 +- ...hybg_csmv.F90 => psb_c_cuda_hybg_csmv.F90} | 12 +- ....F90 => psb_c_cuda_hybg_inner_vect_sv.F90} | 18 +- ...hybg_mold.F90 => psb_c_cuda_hybg_mold.F90} | 10 +- ....F90 => psb_c_cuda_hybg_reallocate_nz.F90} | 12 +- ...hybg_scal.F90 => psb_c_cuda_hybg_scal.F90} | 10 +- ...bg_scals.F90 => psb_c_cuda_hybg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_c_cuda_hybg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_c_cuda_hybg_vect_mv.F90} | 18 +- ...oo.F90 => psb_c_cuda_mv_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_c_cuda_mv_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_c_cuda_mv_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_c_cuda_mv_elg_from_coo.F90} | 10 +- ...fmt.F90 => psb_c_cuda_mv_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_c_cuda_mv_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_c_cuda_mv_hlg_from_coo.F90} | 12 +- ...fmt.F90 => psb_c_cuda_mv_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_c_cuda_mv_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_c_cuda_mv_hybg_from_fmt.F90} | 10 +- ...oo.F90 => psb_d_cuda_cp_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_d_cuda_cp_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_d_cuda_cp_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_d_cuda_cp_elg_from_coo.F90} | 14 +- ...fmt.F90 => psb_d_cuda_cp_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_d_cuda_cp_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_d_cuda_cp_hlg_from_coo.F90} | 14 +- ...fmt.F90 => psb_d_cuda_cp_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_d_cuda_cp_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_d_cuda_cp_hybg_from_fmt.F90} | 10 +- ....F90 => psb_d_cuda_csrg_allocate_mnnz.F90} | 10 +- ...csrg_csmm.F90 => psb_d_cuda_csrg_csmm.F90} | 10 +- ...csrg_csmv.F90 => psb_d_cuda_csrg_csmv.F90} | 12 +- ...m_gpu.F90 => psb_d_cuda_csrg_from_gpu.F90} | 10 +- ....F90 => psb_d_cuda_csrg_inner_vect_sv.F90} | 18 +- ...csrg_mold.F90 => psb_d_cuda_csrg_mold.F90} | 10 +- ....F90 => psb_d_cuda_csrg_reallocate_nz.F90} | 12 +- ...csrg_scal.F90 => psb_d_cuda_csrg_scal.F90} | 10 +- ...rg_scals.F90 => psb_d_cuda_csrg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_d_cuda_csrg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_d_cuda_csrg_vect_mv.F90} | 18 +- ...diag_csmv.F90 => psb_d_cuda_diag_csmv.F90} | 12 +- ...diag_mold.F90 => psb_d_cuda_diag_mold.F90} | 10 +- ..._to_gpu.F90 => psb_d_cuda_diag_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_d_cuda_diag_vect_mv.F90} | 18 +- ..._impl.F90 => psb_d_cuda_dnsg_mat_impl.F90} | 126 +-- ...z.F90 => psb_d_cuda_elg_allocate_mnnz.F90} | 10 +- ...b_s_elg_asb.f90 => psb_d_cuda_elg_asb.f90} | 8 +- ...d_elg_csmm.F90 => psb_d_cuda_elg_csmm.F90} | 12 +- ...d_elg_csmv.F90 => psb_d_cuda_elg_csmv.F90} | 10 +- ...elg_csput.F90 => psb_d_cuda_elg_csput.F90} | 32 +- ...om_gpu.F90 => psb_d_cuda_elg_from_gpu.F90} | 10 +- ...v.F90 => psb_d_cuda_elg_inner_vect_sv.F90} | 14 +- ...d_elg_mold.F90 => psb_d_cuda_elg_mold.F90} | 10 +- ...z.F90 => psb_d_cuda_elg_reallocate_nz.F90} | 12 +- ...d_elg_scal.F90 => psb_d_cuda_elg_scal.F90} | 10 +- ...elg_scals.F90 => psb_d_cuda_elg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_d_cuda_elg_to_gpu.F90} | 10 +- ...s_elg_trim.f90 => psb_d_cuda_elg_trim.f90} | 8 +- ...vect_mv.F90 => psb_d_cuda_elg_vect_mv.F90} | 18 +- ...iag_csmv.F90 => psb_d_cuda_hdiag_csmv.F90} | 12 +- ...iag_mold.F90 => psb_d_cuda_hdiag_mold.F90} | 10 +- ...to_gpu.F90 => psb_d_cuda_hdiag_to_gpu.F90} | 10 +- ...ct_mv.F90 => psb_d_cuda_hdiag_vect_mv.F90} | 18 +- ...z.F90 => psb_d_cuda_hlg_allocate_mnnz.F90} | 10 +- ...d_hlg_csmm.F90 => psb_d_cuda_hlg_csmm.F90} | 12 +- ...d_hlg_csmv.F90 => psb_d_cuda_hlg_csmv.F90} | 12 +- ...om_gpu.F90 => psb_d_cuda_hlg_from_gpu.F90} | 10 +- ...v.F90 => psb_d_cuda_hlg_inner_vect_sv.F90} | 12 +- ...d_hlg_mold.F90 => psb_d_cuda_hlg_mold.F90} | 10 +- ...z.F90 => psb_d_cuda_hlg_reallocate_nz.F90} | 12 +- ...d_hlg_scal.F90 => psb_d_cuda_hlg_scal.F90} | 10 +- ...hlg_scals.F90 => psb_d_cuda_hlg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_d_cuda_hlg_to_gpu.F90} | 10 +- ...vect_mv.F90 => psb_d_cuda_hlg_vect_mv.F90} | 18 +- ....F90 => psb_d_cuda_hybg_allocate_mnnz.F90} | 10 +- ...hybg_csmm.F90 => psb_d_cuda_hybg_csmm.F90} | 12 +- ...hybg_csmv.F90 => psb_d_cuda_hybg_csmv.F90} | 12 +- ....F90 => psb_d_cuda_hybg_inner_vect_sv.F90} | 18 +- ...hybg_mold.F90 => psb_d_cuda_hybg_mold.F90} | 10 +- ....F90 => psb_d_cuda_hybg_reallocate_nz.F90} | 12 +- ...hybg_scal.F90 => psb_d_cuda_hybg_scal.F90} | 10 +- ...bg_scals.F90 => psb_d_cuda_hybg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_d_cuda_hybg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_d_cuda_hybg_vect_mv.F90} | 18 +- ...oo.F90 => psb_d_cuda_mv_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_d_cuda_mv_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_d_cuda_mv_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_d_cuda_mv_elg_from_coo.F90} | 10 +- ...fmt.F90 => psb_d_cuda_mv_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_d_cuda_mv_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_d_cuda_mv_hlg_from_coo.F90} | 12 +- ...fmt.F90 => psb_d_cuda_mv_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_d_cuda_mv_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_d_cuda_mv_hybg_from_fmt.F90} | 10 +- ...oo.F90 => psb_s_cuda_cp_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_s_cuda_cp_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_s_cuda_cp_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_s_cuda_cp_elg_from_coo.F90} | 14 +- ...fmt.F90 => psb_s_cuda_cp_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_s_cuda_cp_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_s_cuda_cp_hlg_from_coo.F90} | 14 +- ...fmt.F90 => psb_s_cuda_cp_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_s_cuda_cp_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_s_cuda_cp_hybg_from_fmt.F90} | 10 +- ....F90 => psb_s_cuda_csrg_allocate_mnnz.F90} | 10 +- ...csrg_csmm.F90 => psb_s_cuda_csrg_csmm.F90} | 10 +- ...csrg_csmv.F90 => psb_s_cuda_csrg_csmv.F90} | 12 +- ...m_gpu.F90 => psb_s_cuda_csrg_from_gpu.F90} | 10 +- ....F90 => psb_s_cuda_csrg_inner_vect_sv.F90} | 18 +- ...csrg_mold.F90 => psb_s_cuda_csrg_mold.F90} | 10 +- ....F90 => psb_s_cuda_csrg_reallocate_nz.F90} | 12 +- ...csrg_scal.F90 => psb_s_cuda_csrg_scal.F90} | 10 +- ...rg_scals.F90 => psb_s_cuda_csrg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_s_cuda_csrg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_s_cuda_csrg_vect_mv.F90} | 18 +- ...diag_csmv.F90 => psb_s_cuda_diag_csmv.F90} | 12 +- ...diag_mold.F90 => psb_s_cuda_diag_mold.F90} | 10 +- ..._to_gpu.F90 => psb_s_cuda_diag_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_s_cuda_diag_vect_mv.F90} | 18 +- ..._impl.F90 => psb_s_cuda_dnsg_mat_impl.F90} | 126 +-- ...z.F90 => psb_s_cuda_elg_allocate_mnnz.F90} | 10 +- ...b_c_elg_asb.f90 => psb_s_cuda_elg_asb.f90} | 8 +- ...s_elg_csmm.F90 => psb_s_cuda_elg_csmm.F90} | 12 +- ...s_elg_csmv.F90 => psb_s_cuda_elg_csmv.F90} | 10 +- ...elg_csput.F90 => psb_s_cuda_elg_csput.F90} | 32 +- ...om_gpu.F90 => psb_s_cuda_elg_from_gpu.F90} | 10 +- ...v.F90 => psb_s_cuda_elg_inner_vect_sv.F90} | 14 +- ...s_elg_mold.F90 => psb_s_cuda_elg_mold.F90} | 10 +- ...z.F90 => psb_s_cuda_elg_reallocate_nz.F90} | 12 +- ...s_elg_scal.F90 => psb_s_cuda_elg_scal.F90} | 10 +- ...elg_scals.F90 => psb_s_cuda_elg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_s_cuda_elg_to_gpu.F90} | 10 +- ...z_elg_trim.f90 => psb_s_cuda_elg_trim.f90} | 8 +- ...vect_mv.F90 => psb_s_cuda_elg_vect_mv.F90} | 18 +- ...iag_csmv.F90 => psb_s_cuda_hdiag_csmv.F90} | 12 +- ...iag_mold.F90 => psb_s_cuda_hdiag_mold.F90} | 10 +- ...to_gpu.F90 => psb_s_cuda_hdiag_to_gpu.F90} | 10 +- ...ct_mv.F90 => psb_s_cuda_hdiag_vect_mv.F90} | 18 +- ...z.F90 => psb_s_cuda_hlg_allocate_mnnz.F90} | 10 +- ...s_hlg_csmm.F90 => psb_s_cuda_hlg_csmm.F90} | 12 +- ...s_hlg_csmv.F90 => psb_s_cuda_hlg_csmv.F90} | 12 +- ...om_gpu.F90 => psb_s_cuda_hlg_from_gpu.F90} | 10 +- ...v.F90 => psb_s_cuda_hlg_inner_vect_sv.F90} | 12 +- ...s_hlg_mold.F90 => psb_s_cuda_hlg_mold.F90} | 10 +- ...z.F90 => psb_s_cuda_hlg_reallocate_nz.F90} | 12 +- ...s_hlg_scal.F90 => psb_s_cuda_hlg_scal.F90} | 10 +- ...hlg_scals.F90 => psb_s_cuda_hlg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_s_cuda_hlg_to_gpu.F90} | 10 +- ...vect_mv.F90 => psb_s_cuda_hlg_vect_mv.F90} | 18 +- ....F90 => psb_s_cuda_hybg_allocate_mnnz.F90} | 10 +- ...hybg_csmm.F90 => psb_s_cuda_hybg_csmm.F90} | 12 +- ...hybg_csmv.F90 => psb_s_cuda_hybg_csmv.F90} | 12 +- ....F90 => psb_s_cuda_hybg_inner_vect_sv.F90} | 18 +- ...hybg_mold.F90 => psb_s_cuda_hybg_mold.F90} | 10 +- ....F90 => psb_s_cuda_hybg_reallocate_nz.F90} | 12 +- ...hybg_scal.F90 => psb_s_cuda_hybg_scal.F90} | 10 +- ...bg_scals.F90 => psb_s_cuda_hybg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_s_cuda_hybg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_s_cuda_hybg_vect_mv.F90} | 18 +- ...oo.F90 => psb_s_cuda_mv_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_s_cuda_mv_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_s_cuda_mv_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_s_cuda_mv_elg_from_coo.F90} | 10 +- ...fmt.F90 => psb_s_cuda_mv_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_s_cuda_mv_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_s_cuda_mv_hlg_from_coo.F90} | 12 +- ...fmt.F90 => psb_s_cuda_mv_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_s_cuda_mv_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_s_cuda_mv_hybg_from_fmt.F90} | 10 +- ...oo.F90 => psb_z_cuda_cp_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_z_cuda_cp_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_z_cuda_cp_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_z_cuda_cp_elg_from_coo.F90} | 14 +- ...fmt.F90 => psb_z_cuda_cp_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_z_cuda_cp_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_z_cuda_cp_hlg_from_coo.F90} | 14 +- ...fmt.F90 => psb_z_cuda_cp_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_z_cuda_cp_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_z_cuda_cp_hybg_from_fmt.F90} | 10 +- ....F90 => psb_z_cuda_csrg_allocate_mnnz.F90} | 10 +- ...csrg_csmm.F90 => psb_z_cuda_csrg_csmm.F90} | 10 +- ...csrg_csmv.F90 => psb_z_cuda_csrg_csmv.F90} | 12 +- ...m_gpu.F90 => psb_z_cuda_csrg_from_gpu.F90} | 10 +- ....F90 => psb_z_cuda_csrg_inner_vect_sv.F90} | 18 +- ...csrg_mold.F90 => psb_z_cuda_csrg_mold.F90} | 10 +- ....F90 => psb_z_cuda_csrg_reallocate_nz.F90} | 12 +- ...csrg_scal.F90 => psb_z_cuda_csrg_scal.F90} | 10 +- ...rg_scals.F90 => psb_z_cuda_csrg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_z_cuda_csrg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_z_cuda_csrg_vect_mv.F90} | 18 +- ...diag_csmv.F90 => psb_z_cuda_diag_csmv.F90} | 12 +- ...diag_mold.F90 => psb_z_cuda_diag_mold.F90} | 10 +- ..._to_gpu.F90 => psb_z_cuda_diag_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_z_cuda_diag_vect_mv.F90} | 18 +- ..._impl.F90 => psb_z_cuda_dnsg_mat_impl.F90} | 126 +-- ...z.F90 => psb_z_cuda_elg_allocate_mnnz.F90} | 10 +- ...b_z_elg_asb.f90 => psb_z_cuda_elg_asb.f90} | 8 +- ...z_elg_csmm.F90 => psb_z_cuda_elg_csmm.F90} | 12 +- ...z_elg_csmv.F90 => psb_z_cuda_elg_csmv.F90} | 10 +- ...elg_csput.F90 => psb_z_cuda_elg_csput.F90} | 32 +- ...om_gpu.F90 => psb_z_cuda_elg_from_gpu.F90} | 10 +- ...v.F90 => psb_z_cuda_elg_inner_vect_sv.F90} | 14 +- ...z_elg_mold.F90 => psb_z_cuda_elg_mold.F90} | 10 +- ...z.F90 => psb_z_cuda_elg_reallocate_nz.F90} | 12 +- ...z_elg_scal.F90 => psb_z_cuda_elg_scal.F90} | 10 +- ...elg_scals.F90 => psb_z_cuda_elg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_z_cuda_elg_to_gpu.F90} | 10 +- ...c_elg_trim.f90 => psb_z_cuda_elg_trim.f90} | 8 +- ...vect_mv.F90 => psb_z_cuda_elg_vect_mv.F90} | 18 +- ...iag_csmv.F90 => psb_z_cuda_hdiag_csmv.F90} | 12 +- ...iag_mold.F90 => psb_z_cuda_hdiag_mold.F90} | 10 +- ...to_gpu.F90 => psb_z_cuda_hdiag_to_gpu.F90} | 10 +- ...ct_mv.F90 => psb_z_cuda_hdiag_vect_mv.F90} | 18 +- ...z.F90 => psb_z_cuda_hlg_allocate_mnnz.F90} | 10 +- ...z_hlg_csmm.F90 => psb_z_cuda_hlg_csmm.F90} | 12 +- ...z_hlg_csmv.F90 => psb_z_cuda_hlg_csmv.F90} | 12 +- ...om_gpu.F90 => psb_z_cuda_hlg_from_gpu.F90} | 10 +- ...v.F90 => psb_z_cuda_hlg_inner_vect_sv.F90} | 12 +- ...z_hlg_mold.F90 => psb_z_cuda_hlg_mold.F90} | 10 +- ...z.F90 => psb_z_cuda_hlg_reallocate_nz.F90} | 12 +- ...z_hlg_scal.F90 => psb_z_cuda_hlg_scal.F90} | 10 +- ...hlg_scals.F90 => psb_z_cuda_hlg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_z_cuda_hlg_to_gpu.F90} | 10 +- ...vect_mv.F90 => psb_z_cuda_hlg_vect_mv.F90} | 18 +- ....F90 => psb_z_cuda_hybg_allocate_mnnz.F90} | 10 +- ...hybg_csmm.F90 => psb_z_cuda_hybg_csmm.F90} | 12 +- ...hybg_csmv.F90 => psb_z_cuda_hybg_csmv.F90} | 12 +- ....F90 => psb_z_cuda_hybg_inner_vect_sv.F90} | 18 +- ...hybg_mold.F90 => psb_z_cuda_hybg_mold.F90} | 10 +- ....F90 => psb_z_cuda_hybg_reallocate_nz.F90} | 12 +- ...hybg_scal.F90 => psb_z_cuda_hybg_scal.F90} | 10 +- ...bg_scals.F90 => psb_z_cuda_hybg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_z_cuda_hybg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_z_cuda_hybg_vect_mv.F90} | 18 +- ...oo.F90 => psb_z_cuda_mv_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_z_cuda_mv_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_z_cuda_mv_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_z_cuda_mv_elg_from_coo.F90} | 10 +- ...fmt.F90 => psb_z_cuda_mv_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_z_cuda_mv_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_z_cuda_mv_hlg_from_coo.F90} | 12 +- ...fmt.F90 => psb_z_cuda_mv_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_z_cuda_mv_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_z_cuda_mv_hybg_from_fmt.F90} | 10 +- cuda/psb_c_csrg_mat_mod.F90 | 393 --------- cuda/psb_c_cuda_csrg_mat_mod.F90 | 393 +++++++++ ...at_mod.F90 => psb_c_cuda_diag_mat_mod.F90} | 192 ++--- ...at_mod.F90 => psb_c_cuda_dnsg_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_c_cuda_elg_mat_mod.F90} | 310 +++---- ...t_mod.F90 => psb_c_cuda_hdiag_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_c_cuda_hlg_mat_mod.F90} | 260 +++--- ...at_mod.F90 => psb_c_cuda_hybg_mat_mod.F90} | 192 ++--- ...u_vect_mod.F90 => psb_c_cuda_vect_mod.F90} | 772 +++++++++--------- ...b_gpu_env_mod.F90 => psb_cuda_env_mod.F90} | 120 +-- cuda/{psb_gpu_mod.F90 => psb_cuda_mod.F90} | 74 +- cuda/psb_d_csrg_mat_mod.F90 | 393 --------- cuda/psb_d_cuda_csrg_mat_mod.F90 | 393 +++++++++ ...at_mod.F90 => psb_d_cuda_diag_mat_mod.F90} | 192 ++--- ...at_mod.F90 => psb_d_cuda_dnsg_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_d_cuda_elg_mat_mod.F90} | 310 +++---- ...t_mod.F90 => psb_d_cuda_hdiag_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_d_cuda_hlg_mat_mod.F90} | 260 +++--- ...at_mod.F90 => psb_d_cuda_hybg_mat_mod.F90} | 192 ++--- ...u_vect_mod.F90 => psb_d_cuda_vect_mod.F90} | 772 +++++++++--------- cuda/psb_i_csrg_mat_mod.F90 | 393 --------- cuda/psb_i_cuda_csrg_mat_mod.F90 | 393 +++++++++ ...at_mod.F90 => psb_i_cuda_diag_mat_mod.F90} | 192 ++--- ...at_mod.F90 => psb_i_cuda_dnsg_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_i_cuda_elg_mat_mod.F90} | 310 +++---- ...t_mod.F90 => psb_i_cuda_hdiag_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_i_cuda_hlg_mat_mod.F90} | 260 +++--- ...at_mod.F90 => psb_i_cuda_hybg_mat_mod.F90} | 192 ++--- ...u_vect_mod.F90 => psb_i_cuda_vect_mod.F90} | 642 +++++++-------- cuda/psb_s_csrg_mat_mod.F90 | 393 --------- cuda/psb_s_cuda_csrg_mat_mod.F90 | 393 +++++++++ ...at_mod.F90 => psb_s_cuda_diag_mat_mod.F90} | 192 ++--- ...at_mod.F90 => psb_s_cuda_dnsg_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_s_cuda_elg_mat_mod.F90} | 310 +++---- ...t_mod.F90 => psb_s_cuda_hdiag_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_s_cuda_hlg_mat_mod.F90} | 260 +++--- ...at_mod.F90 => psb_s_cuda_hybg_mat_mod.F90} | 192 ++--- ...u_vect_mod.F90 => psb_s_cuda_vect_mod.F90} | 772 +++++++++--------- cuda/psb_z_csrg_mat_mod.F90 | 393 --------- cuda/psb_z_cuda_csrg_mat_mod.F90 | 393 +++++++++ ...at_mod.F90 => psb_z_cuda_diag_mat_mod.F90} | 192 ++--- ...at_mod.F90 => psb_z_cuda_dnsg_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_z_cuda_elg_mat_mod.F90} | 310 +++---- ...t_mod.F90 => psb_z_cuda_hdiag_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_z_cuda_hlg_mat_mod.F90} | 260 +++--- ...at_mod.F90 => psb_z_cuda_hybg_mat_mod.F90} | 192 ++--- ...u_vect_mod.F90 => psb_z_cuda_vect_mod.F90} | 772 +++++++++--------- test/{gpukern => cudakern}/Makefile | 0 test/{gpukern => cudakern}/c_file_spmv.F90 | 0 test/{gpukern => cudakern}/d_file_spmv.F90 | 0 test/{gpukern => cudakern}/data_input.f90 | 0 test/{gpukern => cudakern}/dpdegenmv.F90 | 16 +- test/{gpukern => cudakern}/s_file_spmv.F90 | 0 test/{gpukern => cudakern}/spdegenmv.F90 | 0 test/{gpukern => cudakern}/z_file_spmv.F90 | 0 357 files changed, 9566 insertions(+), 9566 deletions(-) rename cuda/impl/{psb_c_cp_csrg_from_coo.F90 => psb_c_cuda_cp_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_c_cp_csrg_from_fmt.F90 => psb_c_cuda_cp_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_c_cp_diag_from_coo.F90 => psb_c_cuda_cp_diag_from_coo.F90} (89%) rename cuda/impl/{psb_c_cp_elg_from_coo.F90 => psb_c_cuda_cp_elg_from_coo.F90} (94%) rename cuda/impl/{psb_c_cp_elg_from_fmt.F90 => psb_c_cuda_cp_elg_from_fmt.F90} (93%) rename cuda/impl/{psb_c_cp_hdiag_from_coo.F90 => psb_c_cuda_cp_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_c_cp_hlg_from_coo.F90 => psb_c_cuda_cp_hlg_from_coo.F90} (95%) rename cuda/impl/{psb_c_cp_hlg_from_fmt.F90 => psb_c_cuda_cp_hlg_from_fmt.F90} (90%) rename cuda/impl/{psb_c_cp_hybg_from_coo.F90 => psb_c_cuda_cp_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_c_cp_hybg_from_fmt.F90 => psb_c_cuda_cp_hybg_from_fmt.F90} (89%) rename cuda/impl/{psb_c_csrg_allocate_mnnz.F90 => psb_c_cuda_csrg_allocate_mnnz.F90} (89%) rename cuda/impl/{psb_c_csrg_csmm.F90 => psb_c_cuda_csrg_csmm.F90} (94%) rename cuda/impl/{psb_c_csrg_csmv.F90 => psb_c_cuda_csrg_csmv.F90} (93%) rename cuda/impl/{psb_c_csrg_from_gpu.F90 => psb_c_cuda_csrg_from_gpu.F90} (91%) rename cuda/impl/{psb_c_csrg_inner_vect_sv.F90 => psb_c_cuda_csrg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_c_csrg_mold.F90 => psb_c_cuda_csrg_mold.F90} (88%) rename cuda/impl/{psb_c_csrg_reallocate_nz.F90 => psb_c_cuda_csrg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_c_csrg_scal.F90 => psb_c_cuda_csrg_scal.F90} (90%) rename cuda/impl/{psb_c_csrg_scals.F90 => psb_c_cuda_csrg_scals.F90} (90%) rename cuda/impl/{psb_c_csrg_to_gpu.F90 => psb_c_cuda_csrg_to_gpu.F90} (98%) rename cuda/impl/{psb_c_csrg_vect_mv.F90 => psb_c_cuda_csrg_vect_mv.F90} (90%) rename cuda/impl/{psb_c_diag_csmv.F90 => psb_c_cuda_diag_csmv.F90} (92%) rename cuda/impl/{psb_c_diag_mold.F90 => psb_c_cuda_diag_mold.F90} (88%) rename cuda/impl/{psb_c_diag_to_gpu.F90 => psb_c_cuda_diag_to_gpu.F90} (91%) rename cuda/impl/{psb_c_diag_vect_mv.F90 => psb_c_cuda_diag_vect_mv.F90} (90%) rename cuda/impl/{psb_c_dnsg_mat_impl.F90 => psb_c_cuda_dnsg_mat_impl.F90} (77%) rename cuda/impl/{psb_c_elg_allocate_mnnz.F90 => psb_c_cuda_elg_allocate_mnnz.F90} (93%) rename cuda/impl/{psb_d_elg_asb.f90 => psb_c_cuda_elg_asb.f90} (92%) rename cuda/impl/{psb_c_elg_csmm.F90 => psb_c_cuda_elg_csmm.F90} (93%) rename cuda/impl/{psb_c_elg_csmv.F90 => psb_c_cuda_elg_csmv.F90} (94%) rename cuda/impl/{psb_c_elg_csput.F90 => psb_c_cuda_elg_csput.F90} (89%) rename cuda/impl/{psb_s_elg_from_gpu.F90 => psb_c_cuda_elg_from_gpu.F90} (91%) rename cuda/impl/{psb_c_elg_inner_vect_sv.F90 => psb_c_cuda_elg_inner_vect_sv.F90} (89%) rename cuda/impl/{psb_c_elg_mold.F90 => psb_c_cuda_elg_mold.F90} (89%) rename cuda/impl/{psb_c_elg_reallocate_nz.F90 => psb_c_cuda_elg_reallocate_nz.F90} (89%) rename cuda/impl/{psb_c_elg_scal.F90 => psb_c_cuda_elg_scal.F90} (91%) rename cuda/impl/{psb_c_elg_scals.F90 => psb_c_cuda_elg_scals.F90} (90%) rename cuda/impl/{psb_c_elg_to_gpu.F90 => psb_c_cuda_elg_to_gpu.F90} (93%) rename cuda/impl/{psb_d_elg_trim.f90 => psb_c_cuda_elg_trim.f90} (92%) rename cuda/impl/{psb_c_elg_vect_mv.F90 => psb_c_cuda_elg_vect_mv.F90} (91%) rename cuda/impl/{psb_c_hdiag_csmv.F90 => psb_c_cuda_hdiag_csmv.F90} (92%) rename cuda/impl/{psb_c_hdiag_mold.F90 => psb_c_cuda_hdiag_mold.F90} (88%) rename cuda/impl/{psb_c_hdiag_to_gpu.F90 => psb_c_cuda_hdiag_to_gpu.F90} (92%) rename cuda/impl/{psb_c_hdiag_vect_mv.F90 => psb_c_cuda_hdiag_vect_mv.F90} (90%) rename cuda/impl/{psb_c_hlg_allocate_mnnz.F90 => psb_c_cuda_hlg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_c_hlg_csmm.F90 => psb_c_cuda_hlg_csmm.F90} (93%) rename cuda/impl/{psb_c_hlg_csmv.F90 => psb_c_cuda_hlg_csmv.F90} (93%) rename cuda/impl/{psb_s_hlg_from_gpu.F90 => psb_c_cuda_hlg_from_gpu.F90} (92%) rename cuda/impl/{psb_c_hlg_inner_vect_sv.F90 => psb_c_cuda_hlg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_c_hlg_mold.F90 => psb_c_cuda_hlg_mold.F90} (89%) rename cuda/impl/{psb_c_hlg_reallocate_nz.F90 => psb_c_cuda_hlg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_c_hlg_scal.F90 => psb_c_cuda_hlg_scal.F90} (91%) rename cuda/impl/{psb_c_hlg_scals.F90 => psb_c_cuda_hlg_scals.F90} (91%) rename cuda/impl/{psb_c_hlg_to_gpu.F90 => psb_c_cuda_hlg_to_gpu.F90} (91%) rename cuda/impl/{psb_c_hlg_vect_mv.F90 => psb_c_cuda_hlg_vect_mv.F90} (91%) rename cuda/impl/{psb_c_hybg_allocate_mnnz.F90 => psb_c_cuda_hybg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_c_hybg_csmm.F90 => psb_c_cuda_hybg_csmm.F90} (93%) rename cuda/impl/{psb_c_hybg_csmv.F90 => psb_c_cuda_hybg_csmv.F90} (93%) rename cuda/impl/{psb_c_hybg_inner_vect_sv.F90 => psb_c_cuda_hybg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_c_hybg_mold.F90 => psb_c_cuda_hybg_mold.F90} (89%) rename cuda/impl/{psb_c_hybg_reallocate_nz.F90 => psb_c_cuda_hybg_reallocate_nz.F90} (88%) rename cuda/impl/{psb_c_hybg_scal.F90 => psb_c_cuda_hybg_scal.F90} (91%) rename cuda/impl/{psb_c_hybg_scals.F90 => psb_c_cuda_hybg_scals.F90} (91%) rename cuda/impl/{psb_c_hybg_to_gpu.F90 => psb_c_cuda_hybg_to_gpu.F90} (96%) rename cuda/impl/{psb_c_hybg_vect_mv.F90 => psb_c_cuda_hybg_vect_mv.F90} (91%) rename cuda/impl/{psb_c_mv_csrg_from_coo.F90 => psb_c_cuda_mv_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_c_mv_csrg_from_fmt.F90 => psb_c_cuda_mv_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_c_mv_diag_from_coo.F90 => psb_c_cuda_mv_diag_from_coo.F90} (89%) rename cuda/impl/{psb_c_mv_elg_from_coo.F90 => psb_c_cuda_mv_elg_from_coo.F90} (89%) rename cuda/impl/{psb_c_mv_elg_from_fmt.F90 => psb_c_cuda_mv_elg_from_fmt.F90} (92%) rename cuda/impl/{psb_c_mv_hdiag_from_coo.F90 => psb_c_cuda_mv_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_c_mv_hlg_from_coo.F90 => psb_c_cuda_mv_hlg_from_coo.F90} (88%) rename cuda/impl/{psb_c_mv_hlg_from_fmt.F90 => psb_c_cuda_mv_hlg_from_fmt.F90} (89%) rename cuda/impl/{psb_c_mv_hybg_from_coo.F90 => psb_c_cuda_mv_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_c_mv_hybg_from_fmt.F90 => psb_c_cuda_mv_hybg_from_fmt.F90} (89%) rename cuda/impl/{psb_d_cp_csrg_from_coo.F90 => psb_d_cuda_cp_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_d_cp_csrg_from_fmt.F90 => psb_d_cuda_cp_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_d_cp_diag_from_coo.F90 => psb_d_cuda_cp_diag_from_coo.F90} (89%) rename cuda/impl/{psb_d_cp_elg_from_coo.F90 => psb_d_cuda_cp_elg_from_coo.F90} (94%) rename cuda/impl/{psb_d_cp_elg_from_fmt.F90 => psb_d_cuda_cp_elg_from_fmt.F90} (93%) rename cuda/impl/{psb_d_cp_hdiag_from_coo.F90 => psb_d_cuda_cp_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_d_cp_hlg_from_coo.F90 => psb_d_cuda_cp_hlg_from_coo.F90} (95%) rename cuda/impl/{psb_d_cp_hlg_from_fmt.F90 => psb_d_cuda_cp_hlg_from_fmt.F90} (90%) rename cuda/impl/{psb_d_cp_hybg_from_coo.F90 => psb_d_cuda_cp_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_d_cp_hybg_from_fmt.F90 => psb_d_cuda_cp_hybg_from_fmt.F90} (89%) rename cuda/impl/{psb_d_csrg_allocate_mnnz.F90 => psb_d_cuda_csrg_allocate_mnnz.F90} (89%) rename cuda/impl/{psb_d_csrg_csmm.F90 => psb_d_cuda_csrg_csmm.F90} (94%) rename cuda/impl/{psb_d_csrg_csmv.F90 => psb_d_cuda_csrg_csmv.F90} (93%) rename cuda/impl/{psb_s_csrg_from_gpu.F90 => psb_d_cuda_csrg_from_gpu.F90} (91%) rename cuda/impl/{psb_d_csrg_inner_vect_sv.F90 => psb_d_cuda_csrg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_d_csrg_mold.F90 => psb_d_cuda_csrg_mold.F90} (88%) rename cuda/impl/{psb_d_csrg_reallocate_nz.F90 => psb_d_cuda_csrg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_d_csrg_scal.F90 => psb_d_cuda_csrg_scal.F90} (90%) rename cuda/impl/{psb_d_csrg_scals.F90 => psb_d_cuda_csrg_scals.F90} (90%) rename cuda/impl/{psb_d_csrg_to_gpu.F90 => psb_d_cuda_csrg_to_gpu.F90} (98%) rename cuda/impl/{psb_d_csrg_vect_mv.F90 => psb_d_cuda_csrg_vect_mv.F90} (90%) rename cuda/impl/{psb_d_diag_csmv.F90 => psb_d_cuda_diag_csmv.F90} (92%) rename cuda/impl/{psb_d_diag_mold.F90 => psb_d_cuda_diag_mold.F90} (88%) rename cuda/impl/{psb_d_diag_to_gpu.F90 => psb_d_cuda_diag_to_gpu.F90} (91%) rename cuda/impl/{psb_d_diag_vect_mv.F90 => psb_d_cuda_diag_vect_mv.F90} (90%) rename cuda/impl/{psb_d_dnsg_mat_impl.F90 => psb_d_cuda_dnsg_mat_impl.F90} (77%) rename cuda/impl/{psb_d_elg_allocate_mnnz.F90 => psb_d_cuda_elg_allocate_mnnz.F90} (93%) rename cuda/impl/{psb_s_elg_asb.f90 => psb_d_cuda_elg_asb.f90} (92%) rename cuda/impl/{psb_d_elg_csmm.F90 => psb_d_cuda_elg_csmm.F90} (93%) rename cuda/impl/{psb_d_elg_csmv.F90 => psb_d_cuda_elg_csmv.F90} (94%) rename cuda/impl/{psb_d_elg_csput.F90 => psb_d_cuda_elg_csput.F90} (89%) rename cuda/impl/{psb_d_elg_from_gpu.F90 => psb_d_cuda_elg_from_gpu.F90} (91%) rename cuda/impl/{psb_d_elg_inner_vect_sv.F90 => psb_d_cuda_elg_inner_vect_sv.F90} (89%) rename cuda/impl/{psb_d_elg_mold.F90 => psb_d_cuda_elg_mold.F90} (89%) rename cuda/impl/{psb_d_elg_reallocate_nz.F90 => psb_d_cuda_elg_reallocate_nz.F90} (89%) rename cuda/impl/{psb_d_elg_scal.F90 => psb_d_cuda_elg_scal.F90} (91%) rename cuda/impl/{psb_d_elg_scals.F90 => psb_d_cuda_elg_scals.F90} (90%) rename cuda/impl/{psb_d_elg_to_gpu.F90 => psb_d_cuda_elg_to_gpu.F90} (93%) rename cuda/impl/{psb_s_elg_trim.f90 => psb_d_cuda_elg_trim.f90} (92%) rename cuda/impl/{psb_d_elg_vect_mv.F90 => psb_d_cuda_elg_vect_mv.F90} (91%) rename cuda/impl/{psb_d_hdiag_csmv.F90 => psb_d_cuda_hdiag_csmv.F90} (92%) rename cuda/impl/{psb_d_hdiag_mold.F90 => psb_d_cuda_hdiag_mold.F90} (88%) rename cuda/impl/{psb_s_hdiag_to_gpu.F90 => psb_d_cuda_hdiag_to_gpu.F90} (92%) rename cuda/impl/{psb_d_hdiag_vect_mv.F90 => psb_d_cuda_hdiag_vect_mv.F90} (90%) rename cuda/impl/{psb_d_hlg_allocate_mnnz.F90 => psb_d_cuda_hlg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_d_hlg_csmm.F90 => psb_d_cuda_hlg_csmm.F90} (93%) rename cuda/impl/{psb_d_hlg_csmv.F90 => psb_d_cuda_hlg_csmv.F90} (93%) rename cuda/impl/{psb_d_hlg_from_gpu.F90 => psb_d_cuda_hlg_from_gpu.F90} (92%) rename cuda/impl/{psb_d_hlg_inner_vect_sv.F90 => psb_d_cuda_hlg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_d_hlg_mold.F90 => psb_d_cuda_hlg_mold.F90} (89%) rename cuda/impl/{psb_d_hlg_reallocate_nz.F90 => psb_d_cuda_hlg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_d_hlg_scal.F90 => psb_d_cuda_hlg_scal.F90} (91%) rename cuda/impl/{psb_d_hlg_scals.F90 => psb_d_cuda_hlg_scals.F90} (91%) rename cuda/impl/{psb_d_hlg_to_gpu.F90 => psb_d_cuda_hlg_to_gpu.F90} (91%) rename cuda/impl/{psb_d_hlg_vect_mv.F90 => psb_d_cuda_hlg_vect_mv.F90} (91%) rename cuda/impl/{psb_d_hybg_allocate_mnnz.F90 => psb_d_cuda_hybg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_d_hybg_csmm.F90 => psb_d_cuda_hybg_csmm.F90} (93%) rename cuda/impl/{psb_d_hybg_csmv.F90 => psb_d_cuda_hybg_csmv.F90} (93%) rename cuda/impl/{psb_d_hybg_inner_vect_sv.F90 => psb_d_cuda_hybg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_d_hybg_mold.F90 => psb_d_cuda_hybg_mold.F90} (89%) rename cuda/impl/{psb_d_hybg_reallocate_nz.F90 => psb_d_cuda_hybg_reallocate_nz.F90} (88%) rename cuda/impl/{psb_d_hybg_scal.F90 => psb_d_cuda_hybg_scal.F90} (91%) rename cuda/impl/{psb_d_hybg_scals.F90 => psb_d_cuda_hybg_scals.F90} (91%) rename cuda/impl/{psb_d_hybg_to_gpu.F90 => psb_d_cuda_hybg_to_gpu.F90} (96%) rename cuda/impl/{psb_d_hybg_vect_mv.F90 => psb_d_cuda_hybg_vect_mv.F90} (90%) rename cuda/impl/{psb_d_mv_csrg_from_coo.F90 => psb_d_cuda_mv_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_d_mv_csrg_from_fmt.F90 => psb_d_cuda_mv_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_d_mv_diag_from_coo.F90 => psb_d_cuda_mv_diag_from_coo.F90} (89%) rename cuda/impl/{psb_d_mv_elg_from_coo.F90 => psb_d_cuda_mv_elg_from_coo.F90} (89%) rename cuda/impl/{psb_d_mv_elg_from_fmt.F90 => psb_d_cuda_mv_elg_from_fmt.F90} (92%) rename cuda/impl/{psb_d_mv_hdiag_from_coo.F90 => psb_d_cuda_mv_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_d_mv_hlg_from_coo.F90 => psb_d_cuda_mv_hlg_from_coo.F90} (88%) rename cuda/impl/{psb_d_mv_hlg_from_fmt.F90 => psb_d_cuda_mv_hlg_from_fmt.F90} (89%) rename cuda/impl/{psb_d_mv_hybg_from_coo.F90 => psb_d_cuda_mv_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_d_mv_hybg_from_fmt.F90 => psb_d_cuda_mv_hybg_from_fmt.F90} (89%) rename cuda/impl/{psb_s_cp_csrg_from_coo.F90 => psb_s_cuda_cp_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_s_cp_csrg_from_fmt.F90 => psb_s_cuda_cp_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_s_cp_diag_from_coo.F90 => psb_s_cuda_cp_diag_from_coo.F90} (89%) rename cuda/impl/{psb_s_cp_elg_from_coo.F90 => psb_s_cuda_cp_elg_from_coo.F90} (94%) rename cuda/impl/{psb_s_cp_elg_from_fmt.F90 => psb_s_cuda_cp_elg_from_fmt.F90} (93%) rename cuda/impl/{psb_s_cp_hdiag_from_coo.F90 => psb_s_cuda_cp_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_s_cp_hlg_from_coo.F90 => psb_s_cuda_cp_hlg_from_coo.F90} (95%) rename cuda/impl/{psb_s_cp_hlg_from_fmt.F90 => psb_s_cuda_cp_hlg_from_fmt.F90} (90%) rename cuda/impl/{psb_s_cp_hybg_from_coo.F90 => psb_s_cuda_cp_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_s_cp_hybg_from_fmt.F90 => psb_s_cuda_cp_hybg_from_fmt.F90} (89%) rename cuda/impl/{psb_s_csrg_allocate_mnnz.F90 => psb_s_cuda_csrg_allocate_mnnz.F90} (89%) rename cuda/impl/{psb_s_csrg_csmm.F90 => psb_s_cuda_csrg_csmm.F90} (94%) rename cuda/impl/{psb_s_csrg_csmv.F90 => psb_s_cuda_csrg_csmv.F90} (93%) rename cuda/impl/{psb_z_csrg_from_gpu.F90 => psb_s_cuda_csrg_from_gpu.F90} (91%) rename cuda/impl/{psb_s_csrg_inner_vect_sv.F90 => psb_s_cuda_csrg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_s_csrg_mold.F90 => psb_s_cuda_csrg_mold.F90} (88%) rename cuda/impl/{psb_s_csrg_reallocate_nz.F90 => psb_s_cuda_csrg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_s_csrg_scal.F90 => psb_s_cuda_csrg_scal.F90} (90%) rename cuda/impl/{psb_s_csrg_scals.F90 => psb_s_cuda_csrg_scals.F90} (90%) rename cuda/impl/{psb_s_csrg_to_gpu.F90 => psb_s_cuda_csrg_to_gpu.F90} (98%) rename cuda/impl/{psb_s_csrg_vect_mv.F90 => psb_s_cuda_csrg_vect_mv.F90} (90%) rename cuda/impl/{psb_s_diag_csmv.F90 => psb_s_cuda_diag_csmv.F90} (92%) rename cuda/impl/{psb_s_diag_mold.F90 => psb_s_cuda_diag_mold.F90} (88%) rename cuda/impl/{psb_s_diag_to_gpu.F90 => psb_s_cuda_diag_to_gpu.F90} (91%) rename cuda/impl/{psb_s_diag_vect_mv.F90 => psb_s_cuda_diag_vect_mv.F90} (90%) rename cuda/impl/{psb_s_dnsg_mat_impl.F90 => psb_s_cuda_dnsg_mat_impl.F90} (77%) rename cuda/impl/{psb_s_elg_allocate_mnnz.F90 => psb_s_cuda_elg_allocate_mnnz.F90} (93%) rename cuda/impl/{psb_c_elg_asb.f90 => psb_s_cuda_elg_asb.f90} (92%) rename cuda/impl/{psb_s_elg_csmm.F90 => psb_s_cuda_elg_csmm.F90} (93%) rename cuda/impl/{psb_s_elg_csmv.F90 => psb_s_cuda_elg_csmv.F90} (94%) rename cuda/impl/{psb_s_elg_csput.F90 => psb_s_cuda_elg_csput.F90} (89%) rename cuda/impl/{psb_c_elg_from_gpu.F90 => psb_s_cuda_elg_from_gpu.F90} (91%) rename cuda/impl/{psb_s_elg_inner_vect_sv.F90 => psb_s_cuda_elg_inner_vect_sv.F90} (89%) rename cuda/impl/{psb_s_elg_mold.F90 => psb_s_cuda_elg_mold.F90} (89%) rename cuda/impl/{psb_s_elg_reallocate_nz.F90 => psb_s_cuda_elg_reallocate_nz.F90} (89%) rename cuda/impl/{psb_s_elg_scal.F90 => psb_s_cuda_elg_scal.F90} (91%) rename cuda/impl/{psb_s_elg_scals.F90 => psb_s_cuda_elg_scals.F90} (90%) rename cuda/impl/{psb_s_elg_to_gpu.F90 => psb_s_cuda_elg_to_gpu.F90} (93%) rename cuda/impl/{psb_z_elg_trim.f90 => psb_s_cuda_elg_trim.f90} (92%) rename cuda/impl/{psb_s_elg_vect_mv.F90 => psb_s_cuda_elg_vect_mv.F90} (91%) rename cuda/impl/{psb_s_hdiag_csmv.F90 => psb_s_cuda_hdiag_csmv.F90} (92%) rename cuda/impl/{psb_s_hdiag_mold.F90 => psb_s_cuda_hdiag_mold.F90} (88%) rename cuda/impl/{psb_z_hdiag_to_gpu.F90 => psb_s_cuda_hdiag_to_gpu.F90} (92%) rename cuda/impl/{psb_s_hdiag_vect_mv.F90 => psb_s_cuda_hdiag_vect_mv.F90} (90%) rename cuda/impl/{psb_s_hlg_allocate_mnnz.F90 => psb_s_cuda_hlg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_s_hlg_csmm.F90 => psb_s_cuda_hlg_csmm.F90} (93%) rename cuda/impl/{psb_s_hlg_csmv.F90 => psb_s_cuda_hlg_csmv.F90} (93%) rename cuda/impl/{psb_c_hlg_from_gpu.F90 => psb_s_cuda_hlg_from_gpu.F90} (92%) rename cuda/impl/{psb_s_hlg_inner_vect_sv.F90 => psb_s_cuda_hlg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_s_hlg_mold.F90 => psb_s_cuda_hlg_mold.F90} (89%) rename cuda/impl/{psb_s_hlg_reallocate_nz.F90 => psb_s_cuda_hlg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_s_hlg_scal.F90 => psb_s_cuda_hlg_scal.F90} (91%) rename cuda/impl/{psb_s_hlg_scals.F90 => psb_s_cuda_hlg_scals.F90} (91%) rename cuda/impl/{psb_s_hlg_to_gpu.F90 => psb_s_cuda_hlg_to_gpu.F90} (91%) rename cuda/impl/{psb_s_hlg_vect_mv.F90 => psb_s_cuda_hlg_vect_mv.F90} (91%) rename cuda/impl/{psb_s_hybg_allocate_mnnz.F90 => psb_s_cuda_hybg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_s_hybg_csmm.F90 => psb_s_cuda_hybg_csmm.F90} (93%) rename cuda/impl/{psb_s_hybg_csmv.F90 => psb_s_cuda_hybg_csmv.F90} (93%) rename cuda/impl/{psb_s_hybg_inner_vect_sv.F90 => psb_s_cuda_hybg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_s_hybg_mold.F90 => psb_s_cuda_hybg_mold.F90} (89%) rename cuda/impl/{psb_s_hybg_reallocate_nz.F90 => psb_s_cuda_hybg_reallocate_nz.F90} (88%) rename cuda/impl/{psb_s_hybg_scal.F90 => psb_s_cuda_hybg_scal.F90} (91%) rename cuda/impl/{psb_s_hybg_scals.F90 => psb_s_cuda_hybg_scals.F90} (91%) rename cuda/impl/{psb_s_hybg_to_gpu.F90 => psb_s_cuda_hybg_to_gpu.F90} (96%) rename cuda/impl/{psb_s_hybg_vect_mv.F90 => psb_s_cuda_hybg_vect_mv.F90} (90%) rename cuda/impl/{psb_s_mv_csrg_from_coo.F90 => psb_s_cuda_mv_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_s_mv_csrg_from_fmt.F90 => psb_s_cuda_mv_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_s_mv_diag_from_coo.F90 => psb_s_cuda_mv_diag_from_coo.F90} (89%) rename cuda/impl/{psb_s_mv_elg_from_coo.F90 => psb_s_cuda_mv_elg_from_coo.F90} (89%) rename cuda/impl/{psb_s_mv_elg_from_fmt.F90 => psb_s_cuda_mv_elg_from_fmt.F90} (92%) rename cuda/impl/{psb_s_mv_hdiag_from_coo.F90 => psb_s_cuda_mv_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_s_mv_hlg_from_coo.F90 => psb_s_cuda_mv_hlg_from_coo.F90} (88%) rename cuda/impl/{psb_s_mv_hlg_from_fmt.F90 => psb_s_cuda_mv_hlg_from_fmt.F90} (89%) rename cuda/impl/{psb_s_mv_hybg_from_coo.F90 => psb_s_cuda_mv_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_s_mv_hybg_from_fmt.F90 => psb_s_cuda_mv_hybg_from_fmt.F90} (89%) rename cuda/impl/{psb_z_cp_csrg_from_coo.F90 => psb_z_cuda_cp_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_z_cp_csrg_from_fmt.F90 => psb_z_cuda_cp_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_z_cp_diag_from_coo.F90 => psb_z_cuda_cp_diag_from_coo.F90} (89%) rename cuda/impl/{psb_z_cp_elg_from_coo.F90 => psb_z_cuda_cp_elg_from_coo.F90} (94%) rename cuda/impl/{psb_z_cp_elg_from_fmt.F90 => psb_z_cuda_cp_elg_from_fmt.F90} (93%) rename cuda/impl/{psb_z_cp_hdiag_from_coo.F90 => psb_z_cuda_cp_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_z_cp_hlg_from_coo.F90 => psb_z_cuda_cp_hlg_from_coo.F90} (95%) rename cuda/impl/{psb_z_cp_hlg_from_fmt.F90 => psb_z_cuda_cp_hlg_from_fmt.F90} (90%) rename cuda/impl/{psb_z_cp_hybg_from_coo.F90 => psb_z_cuda_cp_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_z_cp_hybg_from_fmt.F90 => psb_z_cuda_cp_hybg_from_fmt.F90} (89%) rename cuda/impl/{psb_z_csrg_allocate_mnnz.F90 => psb_z_cuda_csrg_allocate_mnnz.F90} (89%) rename cuda/impl/{psb_z_csrg_csmm.F90 => psb_z_cuda_csrg_csmm.F90} (94%) rename cuda/impl/{psb_z_csrg_csmv.F90 => psb_z_cuda_csrg_csmv.F90} (93%) rename cuda/impl/{psb_d_csrg_from_gpu.F90 => psb_z_cuda_csrg_from_gpu.F90} (91%) rename cuda/impl/{psb_z_csrg_inner_vect_sv.F90 => psb_z_cuda_csrg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_z_csrg_mold.F90 => psb_z_cuda_csrg_mold.F90} (88%) rename cuda/impl/{psb_z_csrg_reallocate_nz.F90 => psb_z_cuda_csrg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_z_csrg_scal.F90 => psb_z_cuda_csrg_scal.F90} (90%) rename cuda/impl/{psb_z_csrg_scals.F90 => psb_z_cuda_csrg_scals.F90} (90%) rename cuda/impl/{psb_z_csrg_to_gpu.F90 => psb_z_cuda_csrg_to_gpu.F90} (98%) rename cuda/impl/{psb_z_csrg_vect_mv.F90 => psb_z_cuda_csrg_vect_mv.F90} (90%) rename cuda/impl/{psb_z_diag_csmv.F90 => psb_z_cuda_diag_csmv.F90} (92%) rename cuda/impl/{psb_z_diag_mold.F90 => psb_z_cuda_diag_mold.F90} (88%) rename cuda/impl/{psb_z_diag_to_gpu.F90 => psb_z_cuda_diag_to_gpu.F90} (91%) rename cuda/impl/{psb_z_diag_vect_mv.F90 => psb_z_cuda_diag_vect_mv.F90} (90%) rename cuda/impl/{psb_z_dnsg_mat_impl.F90 => psb_z_cuda_dnsg_mat_impl.F90} (77%) rename cuda/impl/{psb_z_elg_allocate_mnnz.F90 => psb_z_cuda_elg_allocate_mnnz.F90} (93%) rename cuda/impl/{psb_z_elg_asb.f90 => psb_z_cuda_elg_asb.f90} (92%) rename cuda/impl/{psb_z_elg_csmm.F90 => psb_z_cuda_elg_csmm.F90} (93%) rename cuda/impl/{psb_z_elg_csmv.F90 => psb_z_cuda_elg_csmv.F90} (94%) rename cuda/impl/{psb_z_elg_csput.F90 => psb_z_cuda_elg_csput.F90} (89%) rename cuda/impl/{psb_z_elg_from_gpu.F90 => psb_z_cuda_elg_from_gpu.F90} (91%) rename cuda/impl/{psb_z_elg_inner_vect_sv.F90 => psb_z_cuda_elg_inner_vect_sv.F90} (89%) rename cuda/impl/{psb_z_elg_mold.F90 => psb_z_cuda_elg_mold.F90} (89%) rename cuda/impl/{psb_z_elg_reallocate_nz.F90 => psb_z_cuda_elg_reallocate_nz.F90} (89%) rename cuda/impl/{psb_z_elg_scal.F90 => psb_z_cuda_elg_scal.F90} (91%) rename cuda/impl/{psb_z_elg_scals.F90 => psb_z_cuda_elg_scals.F90} (90%) rename cuda/impl/{psb_z_elg_to_gpu.F90 => psb_z_cuda_elg_to_gpu.F90} (93%) rename cuda/impl/{psb_c_elg_trim.f90 => psb_z_cuda_elg_trim.f90} (92%) rename cuda/impl/{psb_z_elg_vect_mv.F90 => psb_z_cuda_elg_vect_mv.F90} (91%) rename cuda/impl/{psb_z_hdiag_csmv.F90 => psb_z_cuda_hdiag_csmv.F90} (92%) rename cuda/impl/{psb_z_hdiag_mold.F90 => psb_z_cuda_hdiag_mold.F90} (88%) rename cuda/impl/{psb_d_hdiag_to_gpu.F90 => psb_z_cuda_hdiag_to_gpu.F90} (92%) rename cuda/impl/{psb_z_hdiag_vect_mv.F90 => psb_z_cuda_hdiag_vect_mv.F90} (90%) rename cuda/impl/{psb_z_hlg_allocate_mnnz.F90 => psb_z_cuda_hlg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_z_hlg_csmm.F90 => psb_z_cuda_hlg_csmm.F90} (93%) rename cuda/impl/{psb_z_hlg_csmv.F90 => psb_z_cuda_hlg_csmv.F90} (93%) rename cuda/impl/{psb_z_hlg_from_gpu.F90 => psb_z_cuda_hlg_from_gpu.F90} (92%) rename cuda/impl/{psb_z_hlg_inner_vect_sv.F90 => psb_z_cuda_hlg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_z_hlg_mold.F90 => psb_z_cuda_hlg_mold.F90} (89%) rename cuda/impl/{psb_z_hlg_reallocate_nz.F90 => psb_z_cuda_hlg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_z_hlg_scal.F90 => psb_z_cuda_hlg_scal.F90} (91%) rename cuda/impl/{psb_z_hlg_scals.F90 => psb_z_cuda_hlg_scals.F90} (91%) rename cuda/impl/{psb_z_hlg_to_gpu.F90 => psb_z_cuda_hlg_to_gpu.F90} (91%) rename cuda/impl/{psb_z_hlg_vect_mv.F90 => psb_z_cuda_hlg_vect_mv.F90} (91%) rename cuda/impl/{psb_z_hybg_allocate_mnnz.F90 => psb_z_cuda_hybg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_z_hybg_csmm.F90 => psb_z_cuda_hybg_csmm.F90} (93%) rename cuda/impl/{psb_z_hybg_csmv.F90 => psb_z_cuda_hybg_csmv.F90} (93%) rename cuda/impl/{psb_z_hybg_inner_vect_sv.F90 => psb_z_cuda_hybg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_z_hybg_mold.F90 => psb_z_cuda_hybg_mold.F90} (89%) rename cuda/impl/{psb_z_hybg_reallocate_nz.F90 => psb_z_cuda_hybg_reallocate_nz.F90} (88%) rename cuda/impl/{psb_z_hybg_scal.F90 => psb_z_cuda_hybg_scal.F90} (91%) rename cuda/impl/{psb_z_hybg_scals.F90 => psb_z_cuda_hybg_scals.F90} (91%) rename cuda/impl/{psb_z_hybg_to_gpu.F90 => psb_z_cuda_hybg_to_gpu.F90} (96%) rename cuda/impl/{psb_z_hybg_vect_mv.F90 => psb_z_cuda_hybg_vect_mv.F90} (91%) rename cuda/impl/{psb_z_mv_csrg_from_coo.F90 => psb_z_cuda_mv_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_z_mv_csrg_from_fmt.F90 => psb_z_cuda_mv_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_z_mv_diag_from_coo.F90 => psb_z_cuda_mv_diag_from_coo.F90} (89%) rename cuda/impl/{psb_z_mv_elg_from_coo.F90 => psb_z_cuda_mv_elg_from_coo.F90} (89%) rename cuda/impl/{psb_z_mv_elg_from_fmt.F90 => psb_z_cuda_mv_elg_from_fmt.F90} (92%) rename cuda/impl/{psb_z_mv_hdiag_from_coo.F90 => psb_z_cuda_mv_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_z_mv_hlg_from_coo.F90 => psb_z_cuda_mv_hlg_from_coo.F90} (88%) rename cuda/impl/{psb_z_mv_hlg_from_fmt.F90 => psb_z_cuda_mv_hlg_from_fmt.F90} (89%) rename cuda/impl/{psb_z_mv_hybg_from_coo.F90 => psb_z_cuda_mv_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_z_mv_hybg_from_fmt.F90 => psb_z_cuda_mv_hybg_from_fmt.F90} (89%) delete mode 100644 cuda/psb_c_csrg_mat_mod.F90 create mode 100644 cuda/psb_c_cuda_csrg_mat_mod.F90 rename cuda/{psb_c_diag_mat_mod.F90 => psb_c_cuda_diag_mat_mod.F90} (52%) rename cuda/{psb_c_dnsg_mat_mod.F90 => psb_c_cuda_dnsg_mat_mod.F90} (51%) rename cuda/{psb_c_elg_mat_mod.F90 => psb_c_cuda_elg_mat_mod.F90} (50%) rename cuda/{psb_c_hdiag_mat_mod.F90 => psb_c_cuda_hdiag_mat_mod.F90} (50%) rename cuda/{psb_c_hlg_mat_mod.F90 => psb_c_cuda_hlg_mat_mod.F90} (50%) rename cuda/{psb_c_hybg_mat_mod.F90 => psb_c_cuda_hybg_mat_mod.F90} (52%) rename cuda/{psb_c_gpu_vect_mod.F90 => psb_c_cuda_vect_mod.F90} (72%) rename cuda/{psb_gpu_env_mod.F90 => psb_cuda_env_mod.F90} (74%) rename cuda/{psb_gpu_mod.F90 => psb_cuda_mod.F90} (65%) delete mode 100644 cuda/psb_d_csrg_mat_mod.F90 create mode 100644 cuda/psb_d_cuda_csrg_mat_mod.F90 rename cuda/{psb_d_diag_mat_mod.F90 => psb_d_cuda_diag_mat_mod.F90} (52%) rename cuda/{psb_d_dnsg_mat_mod.F90 => psb_d_cuda_dnsg_mat_mod.F90} (51%) rename cuda/{psb_d_elg_mat_mod.F90 => psb_d_cuda_elg_mat_mod.F90} (50%) rename cuda/{psb_d_hdiag_mat_mod.F90 => psb_d_cuda_hdiag_mat_mod.F90} (50%) rename cuda/{psb_d_hlg_mat_mod.F90 => psb_d_cuda_hlg_mat_mod.F90} (50%) rename cuda/{psb_d_hybg_mat_mod.F90 => psb_d_cuda_hybg_mat_mod.F90} (52%) rename cuda/{psb_d_gpu_vect_mod.F90 => psb_d_cuda_vect_mod.F90} (72%) delete mode 100644 cuda/psb_i_csrg_mat_mod.F90 create mode 100644 cuda/psb_i_cuda_csrg_mat_mod.F90 rename cuda/{psb_i_diag_mat_mod.F90 => psb_i_cuda_diag_mat_mod.F90} (52%) rename cuda/{psb_i_dnsg_mat_mod.F90 => psb_i_cuda_dnsg_mat_mod.F90} (51%) rename cuda/{psb_i_elg_mat_mod.F90 => psb_i_cuda_elg_mat_mod.F90} (50%) rename cuda/{psb_i_hdiag_mat_mod.F90 => psb_i_cuda_hdiag_mat_mod.F90} (50%) rename cuda/{psb_i_hlg_mat_mod.F90 => psb_i_cuda_hlg_mat_mod.F90} (50%) rename cuda/{psb_i_hybg_mat_mod.F90 => psb_i_cuda_hybg_mat_mod.F90} (52%) rename cuda/{psb_i_gpu_vect_mod.F90 => psb_i_cuda_vect_mod.F90} (72%) delete mode 100644 cuda/psb_s_csrg_mat_mod.F90 create mode 100644 cuda/psb_s_cuda_csrg_mat_mod.F90 rename cuda/{psb_s_diag_mat_mod.F90 => psb_s_cuda_diag_mat_mod.F90} (52%) rename cuda/{psb_s_dnsg_mat_mod.F90 => psb_s_cuda_dnsg_mat_mod.F90} (51%) rename cuda/{psb_s_elg_mat_mod.F90 => psb_s_cuda_elg_mat_mod.F90} (50%) rename cuda/{psb_s_hdiag_mat_mod.F90 => psb_s_cuda_hdiag_mat_mod.F90} (50%) rename cuda/{psb_s_hlg_mat_mod.F90 => psb_s_cuda_hlg_mat_mod.F90} (50%) rename cuda/{psb_s_hybg_mat_mod.F90 => psb_s_cuda_hybg_mat_mod.F90} (52%) rename cuda/{psb_s_gpu_vect_mod.F90 => psb_s_cuda_vect_mod.F90} (72%) delete mode 100644 cuda/psb_z_csrg_mat_mod.F90 create mode 100644 cuda/psb_z_cuda_csrg_mat_mod.F90 rename cuda/{psb_z_diag_mat_mod.F90 => psb_z_cuda_diag_mat_mod.F90} (52%) rename cuda/{psb_z_dnsg_mat_mod.F90 => psb_z_cuda_dnsg_mat_mod.F90} (51%) rename cuda/{psb_z_elg_mat_mod.F90 => psb_z_cuda_elg_mat_mod.F90} (50%) rename cuda/{psb_z_hdiag_mat_mod.F90 => psb_z_cuda_hdiag_mat_mod.F90} (50%) rename cuda/{psb_z_hlg_mat_mod.F90 => psb_z_cuda_hlg_mat_mod.F90} (50%) rename cuda/{psb_z_hybg_mat_mod.F90 => psb_z_cuda_hybg_mat_mod.F90} (52%) rename cuda/{psb_z_gpu_vect_mod.F90 => psb_z_cuda_vect_mod.F90} (72%) rename test/{gpukern => cudakern}/Makefile (100%) rename test/{gpukern => cudakern}/c_file_spmv.F90 (100%) rename test/{gpukern => cudakern}/d_file_spmv.F90 (100%) rename test/{gpukern => cudakern}/data_input.f90 (100%) rename test/{gpukern => cudakern}/dpdegenmv.F90 (99%) rename test/{gpukern => cudakern}/s_file_spmv.F90 (100%) rename test/{gpukern => cudakern}/spdegenmv.F90 (100%) rename test/{gpukern => cudakern}/z_file_spmv.F90 (100%) diff --git a/cuda/Makefile b/cuda/Makefile index 2b0c011a..0f03e359 100755 --- a/cuda/Makefile +++ b/cuda/Makefile @@ -23,22 +23,22 @@ FOBJS=cusparse_mod.o base_cusparse_mod.o \ psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_i_vectordev_mod.o\ psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_base_vectordev_mod.o \ elldev_mod.o hlldev_mod.o diagdev_mod.o hdiagdev_mod.o \ - psb_i_gpu_vect_mod.o \ - psb_d_gpu_vect_mod.o psb_s_gpu_vect_mod.o\ - psb_z_gpu_vect_mod.o psb_c_gpu_vect_mod.o\ - psb_d_elg_mat_mod.o psb_d_hlg_mat_mod.o \ - psb_d_hybg_mat_mod.o psb_d_csrg_mat_mod.o\ - psb_s_elg_mat_mod.o psb_s_hlg_mat_mod.o \ - psb_s_hybg_mat_mod.o psb_s_csrg_mat_mod.o\ - psb_c_elg_mat_mod.o psb_c_hlg_mat_mod.o \ - psb_c_hybg_mat_mod.o psb_c_csrg_mat_mod.o\ - psb_z_elg_mat_mod.o psb_z_hlg_mat_mod.o \ - psb_z_hybg_mat_mod.o psb_z_csrg_mat_mod.o\ - psb_gpu_env_mod.o psb_gpu_mod.o \ - psb_d_diag_mat_mod.o\ - psb_d_hdiag_mat_mod.o psb_s_hdiag_mat_mod.o\ - psb_s_dnsg_mat_mod.o psb_d_dnsg_mat_mod.o \ - psb_c_dnsg_mat_mod.o psb_z_dnsg_mat_mod.o \ + psb_i_cuda_vect_mod.o \ + psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\ + psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\ + psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \ + psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\ + psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \ + psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\ + psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \ + psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\ + psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \ + psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\ + psb_cuda_env_mod.o psb_cuda_mod.o \ + psb_d_cuda_diag_mat_mod.o\ + psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\ + psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \ + psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o \ dnsdev_mod.o COBJS= elldev.o hlldev.o diagdev.o hdiagdev.o vectordev.o ivectordev.o dnsdev.o\ @@ -65,30 +65,30 @@ lib: ilib cudalib spgpulib /bin/cp -p $(LIBNAME) $(LIBDIR) dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o -psb_d_gpu_vect_mod.o psb_s_gpu_vect_mod.o psb_z_gpu_vect_mod.o psb_c_gpu_vect_mod.o: psb_i_gpu_vect_mod.o -psb_i_gpu_vect_mod.o : psb_vectordev_mod.o psb_gpu_env_mod.o +psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o: psb_i_cuda_vect_mod.o +psb_i_cuda_vect_mod.o : psb_vectordev_mod.o psb_cuda_env_mod.o cusparse_mod.o: s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o : base_cusparse_mod.o -psb_d_hlg_mat_mod.o: hlldev_mod.o psb_d_gpu_vect_mod.o psb_gpu_env_mod.o -psb_d_elg_mat_mod.o: elldev_mod.o psb_d_gpu_vect_mod.o -psb_d_diag_mat_mod.o: diagdev_mod.o psb_d_gpu_vect_mod.o -psb_d_hdiag_mat_mod.o: hdiagdev_mod.o psb_d_gpu_vect_mod.o -psb_s_dnsg_mat_mod.o: dnsdev_mod.o psb_s_gpu_vect_mod.o -psb_d_dnsg_mat_mod.o: dnsdev_mod.o psb_d_gpu_vect_mod.o -psb_c_dnsg_mat_mod.o: dnsdev_mod.o psb_c_gpu_vect_mod.o -psb_z_dnsg_mat_mod.o: dnsdev_mod.o psb_z_gpu_vect_mod.o -psb_s_hlg_mat_mod.o: hlldev_mod.o psb_s_gpu_vect_mod.o psb_gpu_env_mod.o -psb_s_elg_mat_mod.o: elldev_mod.o psb_s_gpu_vect_mod.o -psb_s_diag_mat_mod.o: diagdev_mod.o psb_s_gpu_vect_mod.o -psb_s_hdiag_mat_mod.o: hdiagdev_mod.o psb_s_gpu_vect_mod.o -psb_s_csrg_mat_mod.o psb_s_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o -psb_d_csrg_mat_mod.o psb_d_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o -psb_z_hlg_mat_mod.o: hlldev_mod.o psb_z_gpu_vect_mod.o psb_gpu_env_mod.o -psb_z_elg_mat_mod.o: elldev_mod.o psb_z_gpu_vect_mod.o -psb_c_hlg_mat_mod.o: hlldev_mod.o psb_c_gpu_vect_mod.o psb_gpu_env_mod.o -psb_c_elg_mat_mod.o: elldev_mod.o psb_c_gpu_vect_mod.o -psb_c_csrg_mat_mod.o psb_c_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o -psb_z_csrg_mat_mod.o psb_z_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_d_cuda_hlg_mat_mod.o: hlldev_mod.o psb_d_cuda_vect_mod.o psb_cuda_env_mod.o +psb_d_cuda_elg_mat_mod.o: elldev_mod.o psb_d_cuda_vect_mod.o +psb_d_cuda_diag_mat_mod.o: diagdev_mod.o psb_d_cuda_vect_mod.o +psb_d_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_d_cuda_vect_mod.o +psb_s_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_s_cuda_vect_mod.o +psb_d_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_d_cuda_vect_mod.o +psb_c_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_c_cuda_vect_mod.o +psb_z_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_z_cuda_vect_mod.o +psb_s_cuda_hlg_mat_mod.o: hlldev_mod.o psb_s_cuda_vect_mod.o psb_cuda_env_mod.o +psb_s_cuda_elg_mat_mod.o: elldev_mod.o psb_s_cuda_vect_mod.o +psb_s_cuda_diag_mat_mod.o: diagdev_mod.o psb_s_cuda_vect_mod.o +psb_s_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_s_cuda_vect_mod.o +psb_s_cuda_csrg_mat_mod.o psb_s_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_d_cuda_csrg_mat_mod.o psb_d_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_z_cuda_hlg_mat_mod.o: hlldev_mod.o psb_z_cuda_vect_mod.o psb_cuda_env_mod.o +psb_z_cuda_elg_mat_mod.o: elldev_mod.o psb_z_cuda_vect_mod.o +psb_c_cuda_hlg_mat_mod.o: hlldev_mod.o psb_c_cuda_vect_mod.o psb_cuda_env_mod.o +psb_c_cuda_elg_mat_mod.o: elldev_mod.o psb_c_cuda_vect_mod.o +psb_c_cuda_csrg_mat_mod.o psb_c_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_z_cuda_csrg_mat_mod.o psb_z_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o psb_vectordev_mod.o: psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_i_vectordev_mod.o psb_i_vectordev_mod.o psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o: psb_base_vectordev_mod.o vectordev.o: cuda_util.o vectordev.h @@ -101,22 +101,22 @@ svectordev.o: svectordev.h vectordev.h dvectordev.o: dvectordev.h vectordev.h cvectordev.o: cvectordev.h vectordev.h zvectordev.o: zvectordev.h vectordev.h -psb_gpu_env_mod.o: base_cusparse_mod.o -psb_gpu_mod.o: psb_gpu_env_mod.o psb_i_gpu_vect_mod.o\ - psb_d_gpu_vect_mod.o psb_s_gpu_vect_mod.o\ - psb_z_gpu_vect_mod.o psb_c_gpu_vect_mod.o\ - psb_d_elg_mat_mod.o psb_d_hlg_mat_mod.o \ - psb_d_hybg_mat_mod.o psb_d_csrg_mat_mod.o\ - psb_s_elg_mat_mod.o psb_s_hlg_mat_mod.o \ - psb_s_hybg_mat_mod.o psb_s_csrg_mat_mod.o\ - psb_c_elg_mat_mod.o psb_c_hlg_mat_mod.o \ - psb_c_hybg_mat_mod.o psb_c_csrg_mat_mod.o\ - psb_z_elg_mat_mod.o psb_z_hlg_mat_mod.o \ - psb_z_hybg_mat_mod.o psb_z_csrg_mat_mod.o\ - psb_d_diag_mat_mod.o \ - psb_d_hdiag_mat_mod.o psb_s_hdiag_mat_mod.o\ - psb_s_dnsg_mat_mod.o psb_d_dnsg_mat_mod.o \ - psb_c_dnsg_mat_mod.o psb_z_dnsg_mat_mod.o +psb_cuda_env_mod.o: base_cusparse_mod.o +psb_cuda_mod.o: psb_cuda_env_mod.o psb_i_cuda_vect_mod.o\ + psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\ + psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\ + psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \ + psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\ + psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \ + psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\ + psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \ + psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\ + psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \ + psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\ + psb_d_cuda_diag_mat_mod.o \ + psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\ + psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \ + psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o iobjs: $(FOBJS) $(MAKE) -C impl objs diff --git a/cuda/impl/Makefile b/cuda/impl/Makefile index 6ddac9a7..12bf0747 100755 --- a/cuda/impl/Makefile +++ b/cuda/impl/Makefile @@ -17,276 +17,276 @@ CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) OBJS= \ -psb_d_cp_csrg_from_coo.o \ -psb_d_cp_csrg_from_fmt.o \ -psb_d_cp_elg_from_coo.o \ -psb_d_cp_elg_from_fmt.o \ -psb_s_cp_csrg_from_coo.o \ -psb_s_cp_csrg_from_fmt.o \ -psb_s_csrg_allocate_mnnz.o \ -psb_s_csrg_csmm.o \ -psb_s_csrg_csmv.o \ -psb_s_csrg_mold.o \ -psb_s_csrg_reallocate_nz.o \ -psb_s_csrg_scal.o \ -psb_s_csrg_scals.o \ -psb_s_csrg_from_gpu.o \ -psb_s_csrg_to_gpu.o \ -psb_s_csrg_vect_mv.o \ -psb_s_csrg_inner_vect_sv.o \ -psb_d_csrg_allocate_mnnz.o \ -psb_d_csrg_csmm.o \ -psb_d_csrg_csmv.o \ -psb_d_csrg_mold.o \ -psb_d_csrg_reallocate_nz.o \ -psb_d_csrg_scal.o \ -psb_d_csrg_scals.o \ -psb_d_csrg_from_gpu.o \ -psb_d_csrg_to_gpu.o \ -psb_d_csrg_vect_mv.o \ -psb_d_csrg_inner_vect_sv.o \ -psb_d_elg_allocate_mnnz.o \ -psb_d_elg_asb.o \ -psb_d_elg_csmm.o \ -psb_d_elg_csmv.o \ -psb_d_elg_csput.o \ -psb_d_elg_from_gpu.o \ -psb_d_elg_inner_vect_sv.o \ -psb_d_elg_mold.o \ -psb_d_elg_reallocate_nz.o \ -psb_d_elg_scal.o \ -psb_d_elg_scals.o \ -psb_d_elg_to_gpu.o \ -psb_d_elg_vect_mv.o \ -psb_d_mv_csrg_from_coo.o \ -psb_d_mv_csrg_from_fmt.o \ -psb_d_mv_elg_from_coo.o \ -psb_d_mv_elg_from_fmt.o \ -psb_s_mv_csrg_from_coo.o \ -psb_s_mv_csrg_from_fmt.o \ -psb_s_cp_elg_from_coo.o \ -psb_s_cp_elg_from_fmt.o \ -psb_s_elg_allocate_mnnz.o \ -psb_s_elg_asb.o \ -psb_s_elg_csmm.o \ -psb_s_elg_csmv.o \ -psb_s_elg_csput.o \ -psb_s_elg_inner_vect_sv.o \ -psb_s_elg_mold.o \ -psb_s_elg_reallocate_nz.o \ -psb_s_elg_scal.o \ -psb_s_elg_scals.o \ -psb_s_elg_to_gpu.o \ -psb_s_elg_from_gpu.o \ -psb_s_elg_vect_mv.o \ -psb_s_mv_elg_from_coo.o \ -psb_s_mv_elg_from_fmt.o \ -psb_s_cp_hlg_from_fmt.o \ -psb_s_cp_hlg_from_coo.o \ -psb_d_cp_hlg_from_fmt.o \ -psb_d_cp_hlg_from_coo.o \ -psb_d_hlg_allocate_mnnz.o \ -psb_d_hlg_csmm.o \ -psb_d_hlg_csmv.o \ -psb_d_hlg_inner_vect_sv.o \ -psb_d_hlg_mold.o \ -psb_d_hlg_reallocate_nz.o \ -psb_d_hlg_scal.o \ -psb_d_hlg_scals.o \ -psb_d_hlg_from_gpu.o \ -psb_d_hlg_to_gpu.o \ -psb_d_hlg_vect_mv.o \ -psb_s_hlg_allocate_mnnz.o \ -psb_s_hlg_csmm.o \ -psb_s_hlg_csmv.o \ -psb_s_hlg_inner_vect_sv.o \ -psb_s_hlg_mold.o \ -psb_s_hlg_reallocate_nz.o \ -psb_s_hlg_scal.o \ -psb_s_hlg_scals.o \ -psb_s_hlg_from_gpu.o \ -psb_s_hlg_to_gpu.o \ -psb_s_hlg_vect_mv.o \ -psb_s_mv_hlg_from_coo.o \ -psb_s_cp_hlg_from_coo.o \ -psb_s_mv_hlg_from_fmt.o \ -psb_d_mv_hlg_from_coo.o \ -psb_d_cp_hlg_from_coo.o \ -psb_d_mv_hlg_from_fmt.o \ -psb_s_hybg_allocate_mnnz.o \ -psb_s_hybg_csmm.o \ -psb_s_hybg_csmv.o \ -psb_s_hybg_reallocate_nz.o \ -psb_s_hybg_scal.o \ -psb_s_hybg_scals.o \ -psb_s_hybg_to_gpu.o \ -psb_s_hybg_vect_mv.o \ -psb_s_hybg_inner_vect_sv.o \ -psb_s_cp_hybg_from_coo.o \ -psb_s_cp_hybg_from_fmt.o \ -psb_s_mv_hybg_from_fmt.o \ -psb_s_mv_hybg_from_coo.o \ -psb_s_hybg_mold.o \ -psb_d_hybg_allocate_mnnz.o \ -psb_d_hybg_csmm.o \ -psb_d_hybg_csmv.o \ -psb_d_hybg_reallocate_nz.o \ -psb_d_hybg_scal.o \ -psb_d_hybg_scals.o \ -psb_d_hybg_to_gpu.o \ -psb_d_hybg_vect_mv.o \ -psb_d_hybg_inner_vect_sv.o \ -psb_d_cp_hybg_from_coo.o \ -psb_d_cp_hybg_from_fmt.o \ -psb_d_mv_hybg_from_fmt.o \ -psb_d_mv_hybg_from_coo.o \ -psb_d_hybg_mold.o \ -psb_z_cp_csrg_from_coo.o \ -psb_z_cp_csrg_from_fmt.o \ -psb_z_cp_elg_from_coo.o \ -psb_z_cp_elg_from_fmt.o \ -psb_c_cp_csrg_from_coo.o \ -psb_c_cp_csrg_from_fmt.o \ -psb_c_csrg_allocate_mnnz.o \ -psb_c_csrg_csmm.o \ -psb_c_csrg_csmv.o \ -psb_c_csrg_mold.o \ -psb_c_csrg_reallocate_nz.o \ -psb_c_csrg_scal.o \ -psb_c_csrg_scals.o \ -psb_c_csrg_from_gpu.o \ -psb_c_csrg_to_gpu.o \ -psb_c_csrg_vect_mv.o \ -psb_c_csrg_inner_vect_sv.o \ -psb_z_csrg_allocate_mnnz.o \ -psb_z_csrg_csmm.o \ -psb_z_csrg_csmv.o \ -psb_z_csrg_mold.o \ -psb_z_csrg_reallocate_nz.o \ -psb_z_csrg_scal.o \ -psb_z_csrg_scals.o \ -psb_z_csrg_from_gpu.o \ -psb_z_csrg_to_gpu.o \ -psb_z_csrg_vect_mv.o \ -psb_z_csrg_inner_vect_sv.o \ -psb_z_elg_allocate_mnnz.o \ -psb_z_elg_asb.o \ -psb_z_elg_csmm.o \ -psb_z_elg_csmv.o \ -psb_z_elg_csput.o \ -psb_z_elg_inner_vect_sv.o \ -psb_z_elg_mold.o \ -psb_z_elg_reallocate_nz.o \ -psb_z_elg_scal.o \ -psb_z_elg_scals.o \ -psb_z_elg_to_gpu.o \ -psb_z_elg_from_gpu.o \ -psb_z_elg_vect_mv.o \ -psb_z_mv_csrg_from_coo.o \ -psb_z_mv_csrg_from_fmt.o \ -psb_z_mv_elg_from_coo.o \ -psb_z_mv_elg_from_fmt.o \ -psb_c_mv_csrg_from_coo.o \ -psb_c_mv_csrg_from_fmt.o \ -psb_c_cp_elg_from_coo.o \ -psb_c_cp_elg_from_fmt.o \ -psb_c_elg_allocate_mnnz.o \ -psb_c_elg_asb.o \ -psb_c_elg_csmm.o \ -psb_c_elg_csmv.o \ -psb_c_elg_csput.o \ -psb_c_elg_inner_vect_sv.o \ -psb_c_elg_mold.o \ -psb_c_elg_reallocate_nz.o \ -psb_c_elg_scal.o \ -psb_c_elg_scals.o \ -psb_c_elg_to_gpu.o \ -psb_c_elg_from_gpu.o \ -psb_c_elg_vect_mv.o \ -psb_c_mv_elg_from_coo.o \ -psb_c_mv_elg_from_fmt.o \ -psb_c_cp_hlg_from_fmt.o \ -psb_c_cp_hlg_from_coo.o \ -psb_z_cp_hlg_from_fmt.o \ -psb_z_cp_hlg_from_coo.o \ -psb_z_hlg_allocate_mnnz.o \ -psb_z_hlg_csmm.o \ -psb_z_hlg_csmv.o \ -psb_z_hlg_inner_vect_sv.o \ -psb_z_hlg_mold.o \ -psb_z_hlg_reallocate_nz.o \ -psb_z_hlg_scal.o \ -psb_z_hlg_scals.o \ -psb_z_hlg_from_gpu.o \ -psb_z_hlg_to_gpu.o \ -psb_z_hlg_vect_mv.o \ -psb_c_hlg_allocate_mnnz.o \ -psb_c_hlg_csmm.o \ -psb_c_hlg_csmv.o \ -psb_c_hlg_inner_vect_sv.o \ -psb_c_hlg_mold.o \ -psb_c_hlg_reallocate_nz.o \ -psb_c_hlg_scal.o \ -psb_c_hlg_scals.o \ -psb_c_hlg_from_gpu.o \ -psb_c_hlg_to_gpu.o \ -psb_c_hlg_vect_mv.o \ -psb_c_mv_hlg_from_coo.o \ -psb_c_cp_hlg_from_coo.o \ -psb_c_mv_hlg_from_fmt.o \ -psb_z_mv_hlg_from_coo.o \ -psb_z_cp_hlg_from_coo.o \ -psb_z_mv_hlg_from_fmt.o \ -psb_c_hybg_allocate_mnnz.o \ -psb_c_hybg_csmm.o \ -psb_c_hybg_csmv.o \ -psb_c_hybg_reallocate_nz.o \ -psb_c_hybg_scal.o \ -psb_c_hybg_scals.o \ -psb_c_hybg_to_gpu.o \ -psb_c_hybg_vect_mv.o \ -psb_c_hybg_inner_vect_sv.o \ -psb_c_cp_hybg_from_coo.o \ -psb_c_cp_hybg_from_fmt.o \ -psb_c_mv_hybg_from_fmt.o \ -psb_c_mv_hybg_from_coo.o \ -psb_c_hybg_mold.o \ -psb_z_hybg_allocate_mnnz.o \ -psb_z_hybg_csmm.o \ -psb_z_hybg_csmv.o \ -psb_z_hybg_reallocate_nz.o \ -psb_z_hybg_scal.o \ -psb_z_hybg_scals.o \ -psb_z_hybg_to_gpu.o \ -psb_z_hybg_vect_mv.o \ -psb_z_hybg_inner_vect_sv.o \ -psb_z_cp_hybg_from_coo.o \ -psb_z_cp_hybg_from_fmt.o \ -psb_z_mv_hybg_from_fmt.o \ -psb_z_mv_hybg_from_coo.o \ -psb_z_hybg_mold.o \ -psb_d_cp_diag_from_coo.o \ -psb_d_mv_diag_from_coo.o \ -psb_d_diag_to_gpu.o \ -psb_d_diag_csmv.o \ -psb_d_diag_mold.o \ -psb_d_diag_vect_mv.o \ -psb_d_cp_hdiag_from_coo.o \ -psb_d_mv_hdiag_from_coo.o \ -psb_d_hdiag_to_gpu.o \ -psb_d_hdiag_csmv.o \ -psb_d_hdiag_mold.o \ -psb_d_hdiag_vect_mv.o \ -psb_s_cp_hdiag_from_coo.o \ -psb_s_mv_hdiag_from_coo.o \ -psb_s_hdiag_to_gpu.o \ -psb_s_hdiag_csmv.o \ -psb_s_hdiag_mold.o \ -psb_s_hdiag_vect_mv.o \ -psb_s_dnsg_mat_impl.o \ -psb_d_dnsg_mat_impl.o \ -psb_c_dnsg_mat_impl.o \ -psb_z_dnsg_mat_impl.o +psb_d_cuda_cp_csrg_from_coo.o \ +psb_d_cuda_cp_csrg_from_fmt.o \ +psb_d_cuda_cp_elg_from_coo.o \ +psb_d_cuda_cp_elg_from_fmt.o \ +psb_s_cuda_cp_csrg_from_coo.o \ +psb_s_cuda_cp_csrg_from_fmt.o \ +psb_s_cuda_csrg_allocate_mnnz.o \ +psb_s_cuda_csrg_csmm.o \ +psb_s_cuda_csrg_csmv.o \ +psb_s_cuda_csrg_mold.o \ +psb_s_cuda_csrg_reallocate_nz.o \ +psb_s_cuda_csrg_scal.o \ +psb_s_cuda_csrg_scals.o \ +psb_s_cuda_csrg_from_gpu.o \ +psb_s_cuda_csrg_to_gpu.o \ +psb_s_cuda_csrg_vect_mv.o \ +psb_s_cuda_csrg_inner_vect_sv.o \ +psb_d_cuda_csrg_allocate_mnnz.o \ +psb_d_cuda_csrg_csmm.o \ +psb_d_cuda_csrg_csmv.o \ +psb_d_cuda_csrg_mold.o \ +psb_d_cuda_csrg_reallocate_nz.o \ +psb_d_cuda_csrg_scal.o \ +psb_d_cuda_csrg_scals.o \ +psb_d_cuda_csrg_from_gpu.o \ +psb_d_cuda_csrg_to_gpu.o \ +psb_d_cuda_csrg_vect_mv.o \ +psb_d_cuda_csrg_inner_vect_sv.o \ +psb_d_cuda_elg_allocate_mnnz.o \ +psb_d_cuda_elg_asb.o \ +psb_d_cuda_elg_csmm.o \ +psb_d_cuda_elg_csmv.o \ +psb_d_cuda_elg_csput.o \ +psb_d_cuda_elg_from_gpu.o \ +psb_d_cuda_elg_inner_vect_sv.o \ +psb_d_cuda_elg_mold.o \ +psb_d_cuda_elg_reallocate_nz.o \ +psb_d_cuda_elg_scal.o \ +psb_d_cuda_elg_scals.o \ +psb_d_cuda_elg_to_gpu.o \ +psb_d_cuda_elg_vect_mv.o \ +psb_d_cuda_mv_csrg_from_coo.o \ +psb_d_cuda_mv_csrg_from_fmt.o \ +psb_d_cuda_mv_elg_from_coo.o \ +psb_d_cuda_mv_elg_from_fmt.o \ +psb_s_cuda_mv_csrg_from_coo.o \ +psb_s_cuda_mv_csrg_from_fmt.o \ +psb_s_cuda_cp_elg_from_coo.o \ +psb_s_cuda_cp_elg_from_fmt.o \ +psb_s_cuda_elg_allocate_mnnz.o \ +psb_s_cuda_elg_asb.o \ +psb_s_cuda_elg_csmm.o \ +psb_s_cuda_elg_csmv.o \ +psb_s_cuda_elg_csput.o \ +psb_s_cuda_elg_inner_vect_sv.o \ +psb_s_cuda_elg_mold.o \ +psb_s_cuda_elg_reallocate_nz.o \ +psb_s_cuda_elg_scal.o \ +psb_s_cuda_elg_scals.o \ +psb_s_cuda_elg_to_gpu.o \ +psb_s_cuda_elg_from_gpu.o \ +psb_s_cuda_elg_vect_mv.o \ +psb_s_cuda_mv_elg_from_coo.o \ +psb_s_cuda_mv_elg_from_fmt.o \ +psb_s_cuda_cp_hlg_from_fmt.o \ +psb_s_cuda_cp_hlg_from_coo.o \ +psb_d_cuda_cp_hlg_from_fmt.o \ +psb_d_cuda_cp_hlg_from_coo.o \ +psb_d_cuda_hlg_allocate_mnnz.o \ +psb_d_cuda_hlg_csmm.o \ +psb_d_cuda_hlg_csmv.o \ +psb_d_cuda_hlg_inner_vect_sv.o \ +psb_d_cuda_hlg_mold.o \ +psb_d_cuda_hlg_reallocate_nz.o \ +psb_d_cuda_hlg_scal.o \ +psb_d_cuda_hlg_scals.o \ +psb_d_cuda_hlg_from_gpu.o \ +psb_d_cuda_hlg_to_gpu.o \ +psb_d_cuda_hlg_vect_mv.o \ +psb_s_cuda_hlg_allocate_mnnz.o \ +psb_s_cuda_hlg_csmm.o \ +psb_s_cuda_hlg_csmv.o \ +psb_s_cuda_hlg_inner_vect_sv.o \ +psb_s_cuda_hlg_mold.o \ +psb_s_cuda_hlg_reallocate_nz.o \ +psb_s_cuda_hlg_scal.o \ +psb_s_cuda_hlg_scals.o \ +psb_s_cuda_hlg_from_gpu.o \ +psb_s_cuda_hlg_to_gpu.o \ +psb_s_cuda_hlg_vect_mv.o \ +psb_s_cuda_mv_hlg_from_coo.o \ +psb_s_cuda_cp_hlg_from_coo.o \ +psb_s_cuda_mv_hlg_from_fmt.o \ +psb_d_cuda_mv_hlg_from_coo.o \ +psb_d_cuda_cp_hlg_from_coo.o \ +psb_d_cuda_mv_hlg_from_fmt.o \ +psb_s_cuda_hybg_allocate_mnnz.o \ +psb_s_cuda_hybg_csmm.o \ +psb_s_cuda_hybg_csmv.o \ +psb_s_cuda_hybg_reallocate_nz.o \ +psb_s_cuda_hybg_scal.o \ +psb_s_cuda_hybg_scals.o \ +psb_s_cuda_hybg_to_gpu.o \ +psb_s_cuda_hybg_vect_mv.o \ +psb_s_cuda_hybg_inner_vect_sv.o \ +psb_s_cuda_cp_hybg_from_coo.o \ +psb_s_cuda_cp_hybg_from_fmt.o \ +psb_s_cuda_mv_hybg_from_fmt.o \ +psb_s_cuda_mv_hybg_from_coo.o \ +psb_s_cuda_hybg_mold.o \ +psb_d_cuda_hybg_allocate_mnnz.o \ +psb_d_cuda_hybg_csmm.o \ +psb_d_cuda_hybg_csmv.o \ +psb_d_cuda_hybg_reallocate_nz.o \ +psb_d_cuda_hybg_scal.o \ +psb_d_cuda_hybg_scals.o \ +psb_d_cuda_hybg_to_gpu.o \ +psb_d_cuda_hybg_vect_mv.o \ +psb_d_cuda_hybg_inner_vect_sv.o \ +psb_d_cuda_cp_hybg_from_coo.o \ +psb_d_cuda_cp_hybg_from_fmt.o \ +psb_d_cuda_mv_hybg_from_fmt.o \ +psb_d_cuda_mv_hybg_from_coo.o \ +psb_d_cuda_hybg_mold.o \ +psb_z_cuda_cp_csrg_from_coo.o \ +psb_z_cuda_cp_csrg_from_fmt.o \ +psb_z_cuda_cp_elg_from_coo.o \ +psb_z_cuda_cp_elg_from_fmt.o \ +psb_c_cuda_cp_csrg_from_coo.o \ +psb_c_cuda_cp_csrg_from_fmt.o \ +psb_c_cuda_csrg_allocate_mnnz.o \ +psb_c_cuda_csrg_csmm.o \ +psb_c_cuda_csrg_csmv.o \ +psb_c_cuda_csrg_mold.o \ +psb_c_cuda_csrg_reallocate_nz.o \ +psb_c_cuda_csrg_scal.o \ +psb_c_cuda_csrg_scals.o \ +psb_c_cuda_csrg_from_gpu.o \ +psb_c_cuda_csrg_to_gpu.o \ +psb_c_cuda_csrg_vect_mv.o \ +psb_c_cuda_csrg_inner_vect_sv.o \ +psb_z_cuda_csrg_allocate_mnnz.o \ +psb_z_cuda_csrg_csmm.o \ +psb_z_cuda_csrg_csmv.o \ +psb_z_cuda_csrg_mold.o \ +psb_z_cuda_csrg_reallocate_nz.o \ +psb_z_cuda_csrg_scal.o \ +psb_z_cuda_csrg_scals.o \ +psb_z_cuda_csrg_from_gpu.o \ +psb_z_cuda_csrg_to_gpu.o \ +psb_z_cuda_csrg_vect_mv.o \ +psb_z_cuda_csrg_inner_vect_sv.o \ +psb_z_cuda_elg_allocate_mnnz.o \ +psb_z_cuda_elg_asb.o \ +psb_z_cuda_elg_csmm.o \ +psb_z_cuda_elg_csmv.o \ +psb_z_cuda_elg_csput.o \ +psb_z_cuda_elg_inner_vect_sv.o \ +psb_z_cuda_elg_mold.o \ +psb_z_cuda_elg_reallocate_nz.o \ +psb_z_cuda_elg_scal.o \ +psb_z_cuda_elg_scals.o \ +psb_z_cuda_elg_to_gpu.o \ +psb_z_cuda_elg_from_gpu.o \ +psb_z_cuda_elg_vect_mv.o \ +psb_z_cuda_mv_csrg_from_coo.o \ +psb_z_cuda_mv_csrg_from_fmt.o \ +psb_z_cuda_mv_elg_from_coo.o \ +psb_z_cuda_mv_elg_from_fmt.o \ +psb_c_cuda_mv_csrg_from_coo.o \ +psb_c_cuda_mv_csrg_from_fmt.o \ +psb_c_cuda_cp_elg_from_coo.o \ +psb_c_cuda_cp_elg_from_fmt.o \ +psb_c_cuda_elg_allocate_mnnz.o \ +psb_c_cuda_elg_asb.o \ +psb_c_cuda_elg_csmm.o \ +psb_c_cuda_elg_csmv.o \ +psb_c_cuda_elg_csput.o \ +psb_c_cuda_elg_inner_vect_sv.o \ +psb_c_cuda_elg_mold.o \ +psb_c_cuda_elg_reallocate_nz.o \ +psb_c_cuda_elg_scal.o \ +psb_c_cuda_elg_scals.o \ +psb_c_cuda_elg_to_gpu.o \ +psb_c_cuda_elg_from_gpu.o \ +psb_c_cuda_elg_vect_mv.o \ +psb_c_cuda_mv_elg_from_coo.o \ +psb_c_cuda_mv_elg_from_fmt.o \ +psb_c_cuda_cp_hlg_from_fmt.o \ +psb_c_cuda_cp_hlg_from_coo.o \ +psb_z_cuda_cp_hlg_from_fmt.o \ +psb_z_cuda_cp_hlg_from_coo.o \ +psb_z_cuda_hlg_allocate_mnnz.o \ +psb_z_cuda_hlg_csmm.o \ +psb_z_cuda_hlg_csmv.o \ +psb_z_cuda_hlg_inner_vect_sv.o \ +psb_z_cuda_hlg_mold.o \ +psb_z_cuda_hlg_reallocate_nz.o \ +psb_z_cuda_hlg_scal.o \ +psb_z_cuda_hlg_scals.o \ +psb_z_cuda_hlg_from_gpu.o \ +psb_z_cuda_hlg_to_gpu.o \ +psb_z_cuda_hlg_vect_mv.o \ +psb_c_cuda_hlg_allocate_mnnz.o \ +psb_c_cuda_hlg_csmm.o \ +psb_c_cuda_hlg_csmv.o \ +psb_c_cuda_hlg_inner_vect_sv.o \ +psb_c_cuda_hlg_mold.o \ +psb_c_cuda_hlg_reallocate_nz.o \ +psb_c_cuda_hlg_scal.o \ +psb_c_cuda_hlg_scals.o \ +psb_c_cuda_hlg_from_gpu.o \ +psb_c_cuda_hlg_to_gpu.o \ +psb_c_cuda_hlg_vect_mv.o \ +psb_c_cuda_mv_hlg_from_coo.o \ +psb_c_cuda_cp_hlg_from_coo.o \ +psb_c_cuda_mv_hlg_from_fmt.o \ +psb_z_cuda_mv_hlg_from_coo.o \ +psb_z_cuda_cp_hlg_from_coo.o \ +psb_z_cuda_mv_hlg_from_fmt.o \ +psb_c_cuda_hybg_allocate_mnnz.o \ +psb_c_cuda_hybg_csmm.o \ +psb_c_cuda_hybg_csmv.o \ +psb_c_cuda_hybg_reallocate_nz.o \ +psb_c_cuda_hybg_scal.o \ +psb_c_cuda_hybg_scals.o \ +psb_c_cuda_hybg_to_gpu.o \ +psb_c_cuda_hybg_vect_mv.o \ +psb_c_cuda_hybg_inner_vect_sv.o \ +psb_c_cuda_cp_hybg_from_coo.o \ +psb_c_cuda_cp_hybg_from_fmt.o \ +psb_c_cuda_mv_hybg_from_fmt.o \ +psb_c_cuda_mv_hybg_from_coo.o \ +psb_c_cuda_hybg_mold.o \ +psb_z_cuda_hybg_allocate_mnnz.o \ +psb_z_cuda_hybg_csmm.o \ +psb_z_cuda_hybg_csmv.o \ +psb_z_cuda_hybg_reallocate_nz.o \ +psb_z_cuda_hybg_scal.o \ +psb_z_cuda_hybg_scals.o \ +psb_z_cuda_hybg_to_gpu.o \ +psb_z_cuda_hybg_vect_mv.o \ +psb_z_cuda_hybg_inner_vect_sv.o \ +psb_z_cuda_cp_hybg_from_coo.o \ +psb_z_cuda_cp_hybg_from_fmt.o \ +psb_z_cuda_mv_hybg_from_fmt.o \ +psb_z_cuda_mv_hybg_from_coo.o \ +psb_z_cuda_hybg_mold.o \ +psb_d_cuda_cp_diag_from_coo.o \ +psb_d_cuda_mv_diag_from_coo.o \ +psb_d_cuda_diag_to_gpu.o \ +psb_d_cuda_diag_csmv.o \ +psb_d_cuda_diag_mold.o \ +psb_d_cuda_diag_vect_mv.o \ +psb_d_cuda_cp_hdiag_from_coo.o \ +psb_d_cuda_mv_hdiag_from_coo.o \ +psb_d_cuda_hdiag_to_gpu.o \ +psb_d_cuda_hdiag_csmv.o \ +psb_d_cuda_hdiag_mold.o \ +psb_d_cuda_hdiag_vect_mv.o \ +psb_s_cuda_cp_hdiag_from_coo.o \ +psb_s_cuda_mv_hdiag_from_coo.o \ +psb_s_cuda_hdiag_to_gpu.o \ +psb_s_cuda_hdiag_csmv.o \ +psb_s_cuda_hdiag_mold.o \ +psb_s_cuda_hdiag_vect_mv.o \ +psb_s_cuda_dnsg_mat_impl.o \ +psb_d_cuda_dnsg_mat_impl.o \ +psb_c_cuda_dnsg_mat_impl.o \ +psb_z_cuda_dnsg_mat_impl.o objs: $(OBJS) diff --git a/cuda/impl/psb_c_cp_csrg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_cp_csrg_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 index 9ab3b7f0..af3301ff 100644 --- a/cuda/impl/psb_c_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_c_cp_csrg_from_coo(a,b,info) +subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_cp_csrg_from_coo + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_coo #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_c_cp_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_cp_csrg_from_coo +end subroutine psb_c_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_c_cp_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_c_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 index 5229244f..47845e52 100644 --- a/cuda/impl/psb_c_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_c_cp_csrg_from_fmt(a,b,info) +subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_cp_csrg_from_fmt + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_fmt #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif !use iso_c_binding implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_c_cp_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_c_cp_csrg_from_fmt +end subroutine psb_c_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_c_cp_diag_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_cp_diag_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 index 8d196891..5b1eb817 100644 --- a/cuda/impl/psb_c_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_cp_diag_from_coo(a,b,info) +subroutine psb_c_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_cp_diag_from_coo + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_cp_diag_from_coo #else - use psb_c_diag_mat_mod + use psb_c_cuda_diag_mat_mod #endif implicit none - class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -61,4 +61,4 @@ subroutine psb_c_cp_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_cp_diag_from_coo +end subroutine psb_c_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_c_cp_elg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 similarity index 94% rename from cuda/impl/psb_c_cp_elg_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 index 95193c13..fedffa22 100644 --- a/cuda/impl/psb_c_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_c_cp_elg_from_coo(a,b,info) +subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_cp_elg_from_coo + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_coo use psi_ext_util_mod - use psb_gpu_env_mod + use psb_cuda_env_mod #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -58,7 +58,7 @@ subroutine psb_c_cp_elg_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - hacksize = max(1,psb_gpu_WarpSize()) + hacksize = max(1,psb_cuda_WarpSize()) #else hacksize = 1 #endif @@ -181,4 +181,4 @@ contains end subroutine psi_c_count_ell_from_coo -end subroutine psb_c_cp_elg_from_coo +end subroutine psb_c_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_c_cp_elg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 similarity index 93% rename from cuda/impl/psb_c_cp_elg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 index e8be8a8d..4c44d29a 100644 --- a/cuda/impl/psb_c_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_cp_elg_from_fmt(a,b,info) +subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_cp_elg_from_fmt + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_fmt #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -98,4 +98,4 @@ subroutine psb_c_cp_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_c_cp_elg_from_fmt +end subroutine psb_c_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_c_cp_hdiag_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_c_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 index f0ec00ad..436eabaa 100644 --- a/cuda/impl/psb_c_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_c_cp_hdiag_from_coo(a,b,info) +subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_cp_hdiag_from_coo - use psb_gpu_env_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_c_hdiag_mat_mod + use psb_c_cuda_hdiag_mat_mod #endif implicit none - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_c_cp_hdiag_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_c_hdia_sparse_mat%cp_from_coo(b,info) @@ -70,4 +70,4 @@ subroutine psb_c_cp_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_cp_hdiag_from_coo +end subroutine psb_c_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_c_cp_hlg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 similarity index 95% rename from cuda/impl/psb_c_cp_hlg_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 index cf305592..d30fccbf 100644 --- a/cuda/impl/psb_c_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_c_cp_hlg_from_coo(a,b,info) +subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_cp_hlg_from_coo + use psb_cuda_env_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_coo #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -62,7 +62,7 @@ subroutine psb_c_cp_hlg_from_coo(a,b,info) debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() #ifdef HAVE_SPGPU - hksz = max(1,psb_gpu_WarpSize()) + hksz = max(1,psb_cuda_WarpSize()) #else hksz = psi_get_hksz() #endif @@ -195,4 +195,4 @@ contains !!$ write(*,*) 'End of psi_comput_hckoff ',info end subroutine psi_compute_hckoff_from_coo -end subroutine psb_c_cp_hlg_from_coo +end subroutine psb_c_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_c_cp_hlg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 similarity index 90% rename from cuda/impl/psb_c_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 index 559c501c..259364cd 100644 --- a/cuda/impl/psb_c_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_cp_hlg_from_fmt(a,b,info) +subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_cp_hlg_from_fmt + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_fmt #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -65,4 +65,4 @@ subroutine psb_c_cp_hlg_from_fmt(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_cp_hlg_from_fmt +end subroutine psb_c_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_c_cp_hybg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_cp_hybg_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 index 00a7d4ee..7ebb5240 100644 --- a/cuda/impl/psb_c_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_cp_hybg_from_coo(a,b,info) +subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_cp_hybg_from_coo + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_coo #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -60,5 +60,5 @@ subroutine psb_c_cp_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_cp_hybg_from_coo +end subroutine psb_c_cuda_cp_hybg_from_coo #endif diff --git a/cuda/impl/psb_c_cp_hybg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_c_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 index 643abf99..033ba966 100644 --- a/cuda/impl/psb_c_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_cp_hybg_from_fmt(a,b,info) +subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_cp_hybg_from_fmt + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_fmt #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_c_cp_hybg_from_fmt(a,b,info) #endif end select -end subroutine psb_c_cp_hybg_from_fmt +end subroutine psb_c_cuda_cp_hybg_from_fmt #endif diff --git a/cuda/impl/psb_c_csrg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 similarity index 89% rename from cuda/impl/psb_c_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 index 2183ee63..d9736d23 100644 --- a/cuda/impl/psb_c_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_csrg_allocate_mnnz(m,n,a,nz) +subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_allocate_mnnz + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_allocate_mnnz #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,4 +65,4 @@ subroutine psb_c_csrg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_c_csrg_allocate_mnnz +end subroutine psb_c_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_c_csrg_csmm.F90 b/cuda/impl/psb_c_cuda_csrg_csmm.F90 similarity index 94% rename from cuda/impl/psb_c_csrg_csmm.F90 rename to cuda/impl/psb_c_cuda_csrg_csmm.F90 index cef5d288..8f2f55b9 100644 --- a/cuda/impl/psb_c_csrg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_csrg_csmm.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_csrg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_csmm + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmm #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -131,4 +131,4 @@ subroutine psb_c_csrg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_csrg_csmm +end subroutine psb_c_cuda_csrg_csmm diff --git a/cuda/impl/psb_c_csrg_csmv.F90 b/cuda/impl/psb_c_cuda_csrg_csmv.F90 similarity index 93% rename from cuda/impl/psb_c_csrg_csmv.F90 rename to cuda/impl/psb_c_cuda_csrg_csmv.F90 index 3a543da3..ba681401 100644 --- a/cuda/impl/psb_c_csrg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_csrg_csmv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_csrg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_csmv + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmv #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_c_csrg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_csrg_csmv' + character(len=20) :: name='c_cuda_csrg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -136,4 +136,4 @@ subroutine psb_c_csrg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_csrg_csmv +end subroutine psb_c_cuda_csrg_csmv diff --git a/cuda/impl/psb_c_csrg_from_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_c_csrg_from_gpu.F90 rename to cuda/impl/psb_c_cuda_csrg_from_gpu.F90 index 606606bb..b1bed7e5 100644 --- a/cuda/impl/psb_c_csrg_from_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_csrg_from_gpu(a,info) +subroutine psb_c_cuda_csrg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_from_gpu + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_from_gpu #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, n, nz @@ -70,4 +70,4 @@ subroutine psb_c_csrg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_c_csrg_from_gpu +end subroutine psb_c_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_c_csrg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_c_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 index 39938752..32dec5ef 100644 --- a/cuda/impl/psb_c_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_inner_vect_sv + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_inner_vect_sv #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -51,7 +51,7 @@ subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='c_csrg_inner_vect_sv' + character(len=20) :: name='c_cuda_csrg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -83,9 +83,9 @@ subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -133,4 +133,4 @@ subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_csrg_inner_vect_sv +end subroutine psb_c_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_c_csrg_mold.F90 b/cuda/impl/psb_c_cuda_csrg_mold.F90 similarity index 88% rename from cuda/impl/psb_c_csrg_mold.F90 rename to cuda/impl/psb_c_cuda_csrg_mold.F90 index 8b1b616a..405f2736 100644 --- a/cuda/impl/psb_c_csrg_mold.F90 +++ b/cuda/impl/psb_c_cuda_csrg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_csrg_mold(a,b,info) +subroutine psb_c_cuda_csrg_mold(a,b,info) use psb_base_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_mold + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_mold implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_csrg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_csrg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_csrg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_c_csrg_mold(a,b,info) return -end subroutine psb_c_csrg_mold +end subroutine psb_c_cuda_csrg_mold diff --git a/cuda/impl/psb_c_csrg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_c_csrg_reallocate_nz.F90 rename to cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 index e9db4128..22f9f118 100644 --- a/cuda/impl/psb_c_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_c_csrg_reallocate_nz(nz,a) +subroutine psb_c_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_reallocate_nz + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_reallocate_nz #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='c_csrg_reallocate_nz' + character(len=20) :: name='c_cuda_csrg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,4 +67,4 @@ subroutine psb_c_csrg_reallocate_nz(nz,a) return -end subroutine psb_c_csrg_reallocate_nz +end subroutine psb_c_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_c_csrg_scal.F90 b/cuda/impl/psb_c_cuda_csrg_scal.F90 similarity index 90% rename from cuda/impl/psb_c_csrg_scal.F90 rename to cuda/impl/psb_c_cuda_csrg_scal.F90 index f183a822..556a0ec5 100644 --- a/cuda/impl/psb_c_csrg_scal.F90 +++ b/cuda/impl/psb_c_cuda_csrg_scal.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_c_csrg_scal(d,a,info,side) +subroutine psb_c_cuda_csrg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_scal + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scal #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -70,4 +70,4 @@ subroutine psb_c_csrg_scal(d,a,info,side) return -end subroutine psb_c_csrg_scal +end subroutine psb_c_cuda_csrg_scal diff --git a/cuda/impl/psb_c_csrg_scals.F90 b/cuda/impl/psb_c_cuda_csrg_scals.F90 similarity index 90% rename from cuda/impl/psb_c_csrg_scals.F90 rename to cuda/impl/psb_c_cuda_csrg_scals.F90 index 13f0d707..a67e91cd 100644 --- a/cuda/impl/psb_c_csrg_scals.F90 +++ b/cuda/impl/psb_c_cuda_csrg_scals.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_c_csrg_scals(d,a,info) +subroutine psb_c_cuda_csrg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_scals + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scals #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -68,4 +68,4 @@ subroutine psb_c_csrg_scals(d,a,info) return -end subroutine psb_c_csrg_scals +end subroutine psb_c_cuda_csrg_scals diff --git a/cuda/impl/psb_c_csrg_to_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 similarity index 98% rename from cuda/impl/psb_c_csrg_to_gpu.F90 rename to cuda/impl/psb_c_cuda_csrg_to_gpu.F90 index a04f1bab..ea710cbc 100644 --- a/cuda/impl/psb_c_csrg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_c_csrg_to_gpu(a,info,nzrm) +subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_to_gpu + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_to_gpu #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -322,4 +322,4 @@ subroutine psb_c_csrg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_c_csrg_to_gpu +end subroutine psb_c_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_c_csrg_vect_mv.F90 b/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_c_csrg_vect_mv.F90 rename to cuda/impl/psb_c_cuda_csrg_vect_mv.F90 index 0feb03fd..cb556d20 100644 --- a/cuda/impl/psb_c_csrg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_vect_mv + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_vect_mv #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -54,7 +54,7 @@ subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_csrg_vect_mv' + character(len=20) :: name='c_cuda_csrg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= czero) then if (yy%is_host()) call yy%sync() @@ -122,4 +122,4 @@ subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) 9999 call psb_error_handler(err_act) return -end subroutine psb_c_csrg_vect_mv +end subroutine psb_c_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_c_diag_csmv.F90 b/cuda/impl/psb_c_cuda_diag_csmv.F90 similarity index 92% rename from cuda/impl/psb_c_diag_csmv.F90 rename to cuda/impl/psb_c_cuda_diag_csmv.F90 index 05ca102f..00ab742d 100644 --- a/cuda/impl/psb_c_diag_csmv.F90 +++ b/cuda/impl/psb_c_cuda_diag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_csmv + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_csmv #else - use psb_c_diag_mat_mod + use psb_c_cuda_diag_mat_mod #endif implicit none - class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='c_diag_csmv' + character(len=20) :: name='c_cuda_diag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_diag_csmv +end subroutine psb_c_cuda_diag_csmv diff --git a/cuda/impl/psb_c_diag_mold.F90 b/cuda/impl/psb_c_cuda_diag_mold.F90 similarity index 88% rename from cuda/impl/psb_c_diag_mold.F90 rename to cuda/impl/psb_c_cuda_diag_mold.F90 index 8d79e78d..4c8a3c56 100644 --- a/cuda/impl/psb_c_diag_mold.F90 +++ b/cuda/impl/psb_c_cuda_diag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_diag_mold(a,b,info) +subroutine psb_c_cuda_diag_mold(a,b,info) use psb_base_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_mold + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_mold implicit none - class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_diag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_diag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_diag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_c_diag_mold(a,b,info) return -end subroutine psb_c_diag_mold +end subroutine psb_c_cuda_diag_mold diff --git a/cuda/impl/psb_c_diag_to_gpu.F90 b/cuda/impl/psb_c_cuda_diag_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_c_diag_to_gpu.F90 rename to cuda/impl/psb_c_cuda_diag_to_gpu.F90 index a60fc741..4f2c21d9 100644 --- a/cuda/impl/psb_c_diag_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_diag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_diag_to_gpu(a,info,nzrm) +subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_to_gpu + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_to_gpu #else - use psb_c_diag_mat_mod + use psb_c_cuda_diag_mat_mod #endif use iso_c_binding implicit none - class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -71,4 +71,4 @@ subroutine psb_c_diag_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_c_diag_to_gpu +end subroutine psb_c_cuda_diag_to_gpu diff --git a/cuda/impl/psb_c_diag_vect_mv.F90 b/cuda/impl/psb_c_cuda_diag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_c_diag_vect_mv.F90 rename to cuda/impl/psb_c_cuda_diag_vect_mv.F90 index e680a737..02bb9587 100644 --- a/cuda/impl/psb_c_diag_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_diag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_vect_mv + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_vect_mv #else - use psb_c_diag_mat_mod + use psb_c_cuda_diag_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_diag_vect_mv' + character(len=20) :: name='c_cuda_diag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_diag_vect_mv +end subroutine psb_c_cuda_diag_vect_mv diff --git a/cuda/impl/psb_c_dnsg_mat_impl.F90 b/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 similarity index 77% rename from cuda/impl/psb_c_dnsg_mat_impl.F90 rename to cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 index b70f383a..bb2ec97b 100644 --- a/cuda/impl/psb_c_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_dnsg_vect_mv + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_vect_mv #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(in) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -50,7 +50,7 @@ subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) character :: trans_ complex(psb_spk_), allocatable :: rx(:), ry(:) Integer(Psb_ipk_) :: err_act, m, n, k - character(len=20) :: name='c_dnsg_vect_mv' + character(len=20) :: name='c_cuda_dnsg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -76,9 +76,9 @@ subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) k = a%get_nrows() end if select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= czero) then @@ -117,21 +117,21 @@ subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_dnsg_vect_mv +end subroutine psb_c_cuda_dnsg_vect_mv -subroutine psb_c_dnsg_mold(a,b,info) +subroutine psb_c_cuda_dnsg_mold(a,b,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_dnsg_mold + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_mold #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(in) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -145,7 +145,7 @@ subroutine psb_c_dnsg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_dnsg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_dnsg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -158,54 +158,54 @@ subroutine psb_c_dnsg_mold(a,b,info) return -end subroutine psb_c_dnsg_mold +end subroutine psb_c_cuda_dnsg_mold !!$ !!$ interface -!!$ subroutine psb_c_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_c_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type -!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_c_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_dnsg_inner_vect_sv +!!$ end subroutine psb_c_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_c_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_c_dnsg_reallocate_nz +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_c_dnsg_allocate_mnnz +!!$ end subroutine psb_c_cuda_dnsg_allocate_mnnz !!$ end interface -subroutine psb_c_dnsg_to_gpu(a,info) +subroutine psb_c_cuda_dnsg_to_gpu(a,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_dnsg_to_gpu + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_to_gpu #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda logical, parameter :: debug=.false. - character(len=20) :: name='c_dnsg_to_gpu' + character(len=20) :: name='c_cuda_dnsg_to_gpu' call psb_erractionsave(err_act) info = psb_success_ @@ -226,27 +226,27 @@ subroutine psb_c_dnsg_to_gpu(a,info) return -end subroutine psb_c_dnsg_to_gpu +end subroutine psb_c_cuda_dnsg_to_gpu -subroutine psb_c_cp_dnsg_from_coo(a,b,info) +subroutine psb_c_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_cp_dnsg_from_coo + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_coo #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_dnsg_cp_from_coo' + character(len=20) :: name='c_cuda_dnsg_cp_from_coo' integer(psb_ipk_) :: debug_level, debug_unit logical, parameter :: debug=.false. type(psb_c_coo_sparse_mat) :: tmp @@ -267,27 +267,27 @@ subroutine psb_c_cp_dnsg_from_coo(a,b,info) return -end subroutine psb_c_cp_dnsg_from_coo +end subroutine psb_c_cuda_cp_dnsg_from_coo -subroutine psb_c_cp_dnsg_from_fmt(a,b,info) +subroutine psb_c_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_cp_dnsg_from_fmt + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_fmt #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info type(psb_c_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_dnsg_cp_from_fmt' + character(len=20) :: name='c_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -341,29 +341,29 @@ subroutine psb_c_cp_dnsg_from_fmt(a,b,info) return -end subroutine psb_c_cp_dnsg_from_fmt +end subroutine psb_c_cuda_cp_dnsg_from_fmt -subroutine psb_c_mv_dnsg_from_coo(a,b,info) +subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_mv_dnsg_from_coo + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_coo #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act logical, parameter :: debug=.false. - character(len=20) :: name='c_dnsg_mv_from_coo' + character(len=20) :: name='c_cuda_dnsg_mv_from_coo' call psb_erractionsave(err_act) info = psb_success_ @@ -382,28 +382,28 @@ subroutine psb_c_mv_dnsg_from_coo(a,b,info) return -end subroutine psb_c_mv_dnsg_from_coo +end subroutine psb_c_cuda_mv_dnsg_from_coo -subroutine psb_c_mv_dnsg_from_fmt(a,b,info) +subroutine psb_c_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_mv_dnsg_from_fmt + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_fmt #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info type(psb_c_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_dnsg_cp_from_fmt' + character(len=20) :: name='c_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -458,4 +458,4 @@ subroutine psb_c_mv_dnsg_from_fmt(a,b,info) return -end subroutine psb_c_mv_dnsg_from_fmt +end subroutine psb_c_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_c_elg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 similarity index 93% rename from cuda/impl/psb_c_elg_allocate_mnnz.F90 rename to cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 index ac9e654f..01ca7189 100644 --- a/cuda/impl/psb_c_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_elg_allocate_mnnz(m,n,a,nz) +subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_allocate_mnnz + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_allocate_mnnz #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -110,4 +110,4 @@ subroutine psb_c_elg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_c_elg_allocate_mnnz +end subroutine psb_c_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_d_elg_asb.f90 b/cuda/impl/psb_c_cuda_elg_asb.f90 similarity index 92% rename from cuda/impl/psb_d_elg_asb.f90 rename to cuda/impl/psb_c_cuda_elg_asb.f90 index f80537ef..24af1cc9 100644 --- a/cuda/impl/psb_d_elg_asb.f90 +++ b/cuda/impl/psb_c_cuda_elg_asb.f90 @@ -30,13 +30,13 @@ ! -subroutine psb_d_elg_asb(a) +subroutine psb_c_cuda_elg_asb(a) use psb_base_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_asb + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_asb implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: err_act, info character(len=20) :: name='elg_asb' @@ -62,4 +62,4 @@ subroutine psb_d_elg_asb(a) return -end subroutine psb_d_elg_asb +end subroutine psb_c_cuda_elg_asb diff --git a/cuda/impl/psb_c_elg_csmm.F90 b/cuda/impl/psb_c_cuda_elg_csmm.F90 similarity index 93% rename from cuda/impl/psb_c_elg_csmm.F90 rename to cuda/impl/psb_c_cuda_elg_csmm.F90 index 5d355d88..a5f0e3d5 100644 --- a/cuda/impl/psb_c_elg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_elg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csmm + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmm #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_elg_csmm' + character(len=20) :: name='c_cuda_elg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,4 +131,4 @@ subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_elg_csmm +end subroutine psb_c_cuda_elg_csmm diff --git a/cuda/impl/psb_c_elg_csmv.F90 b/cuda/impl/psb_c_cuda_elg_csmv.F90 similarity index 94% rename from cuda/impl/psb_c_elg_csmv.F90 rename to cuda/impl/psb_c_cuda_elg_csmv.F90 index 9e377726..00f39e8c 100644 --- a/cuda/impl/psb_c_elg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_elg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csmv + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmv #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -133,4 +133,4 @@ subroutine psb_c_elg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_elg_csmv +end subroutine psb_c_cuda_elg_csmv diff --git a/cuda/impl/psb_c_elg_csput.F90 b/cuda/impl/psb_c_cuda_elg_csput.F90 similarity index 89% rename from cuda/impl/psb_c_elg_csput.F90 rename to cuda/impl/psb_c_cuda_elg_csput.F90 index 2a632f21..cc6fc024 100644 --- a/cuda/impl/psb_c_elg_csput.F90 +++ b/cuda/impl/psb_c_cuda_elg_csput.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_c_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_c_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csput_a + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_a #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - character(len=20) :: name='c_elg_csput_a' + character(len=20) :: name='c_cuda_elg_csput_a' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit real(psb_dpk_) :: t1,t2,t3 @@ -120,24 +120,24 @@ subroutine psb_c_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_c_elg_csput_a +end subroutine psb_c_cuda_elg_csput_a -subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_c_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csput_v - use psb_c_gpu_vect_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_v + use psb_c_cuda_vect_mod #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax @@ -145,7 +145,7 @@ subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act - character(len=20) :: name='c_elg_csput_v' + character(len=20) :: name='c_cuda_elg_csput_v' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw logical :: gpu_invoked @@ -199,11 +199,11 @@ subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) t1=psb_wtime() gpu_invoked = .false. select type (ia) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (ja) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (val) - class is (psb_c_vect_gpu) + class is (psb_c_vect_cuda) if (a%is_host()) call a%sync() if (val%is_host()) call val%sync() if (ia%is_host()) call ia%sync() @@ -245,4 +245,4 @@ subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_c_elg_csput_v +end subroutine psb_c_cuda_elg_csput_v diff --git a/cuda/impl/psb_s_elg_from_gpu.F90 b/cuda/impl/psb_c_cuda_elg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_s_elg_from_gpu.F90 rename to cuda/impl/psb_c_cuda_elg_from_gpu.F90 index d043790d..593b52be 100644 --- a/cuda/impl/psb_s_elg_from_gpu.F90 +++ b/cuda/impl/psb_c_cuda_elg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_from_gpu(a,info) +subroutine psb_c_cuda_elg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_from_gpu + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_from_gpu #else - use psb_s_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize @@ -71,4 +71,4 @@ subroutine psb_s_elg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_s_elg_from_gpu +end subroutine psb_c_cuda_elg_from_gpu diff --git a/cuda/impl/psb_c_elg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 similarity index 89% rename from cuda/impl/psb_c_elg_inner_vect_sv.F90 rename to cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 index 97f0f7ff..43843dc6 100644 --- a/cuda/impl/psb_c_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_c_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_inner_vect_sv + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_inner_vect_sv #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans integer(psb_ipk_) :: err_act - character(len=20) :: name='c_elg_inner_vect_sv' + character(len=20) :: name='c_cuda_elg_inner_vect_sv' logical, parameter :: debug=.false. complex(psb_spk_), allocatable :: rx(:), ry(:) @@ -86,4 +86,4 @@ subroutine psb_c_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_elg_inner_vect_sv +end subroutine psb_c_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_c_elg_mold.F90 b/cuda/impl/psb_c_cuda_elg_mold.F90 similarity index 89% rename from cuda/impl/psb_c_elg_mold.F90 rename to cuda/impl/psb_c_cuda_elg_mold.F90 index 17cd2ce2..b428055c 100644 --- a/cuda/impl/psb_c_elg_mold.F90 +++ b/cuda/impl/psb_c_cuda_elg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_elg_mold(a,b,info) +subroutine psb_c_cuda_elg_mold(a,b,info) use psb_base_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_mold + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_mold implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_elg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_elg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_elg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_c_elg_mold(a,b,info) return -end subroutine psb_c_elg_mold +end subroutine psb_c_cuda_elg_mold diff --git a/cuda/impl/psb_c_elg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 similarity index 89% rename from cuda/impl/psb_c_elg_reallocate_nz.F90 rename to cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 index 40d94d36..b97530e1 100644 --- a/cuda/impl/psb_c_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_c_elg_reallocate_nz(nz,a) +subroutine psb_c_cuda_elg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_reallocate_nz + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_reallocate_nz #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='c_elg_reallocate_nz' + character(len=20) :: name='c_cuda_elg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -76,4 +76,4 @@ subroutine psb_c_elg_reallocate_nz(nz,a) return -end subroutine psb_c_elg_reallocate_nz +end subroutine psb_c_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_c_elg_scal.F90 b/cuda/impl/psb_c_cuda_elg_scal.F90 similarity index 91% rename from cuda/impl/psb_c_elg_scal.F90 rename to cuda/impl/psb_c_cuda_elg_scal.F90 index 63d9907e..b169451b 100644 --- a/cuda/impl/psb_c_elg_scal.F90 +++ b/cuda/impl/psb_c_cuda_elg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_scal(d,a,info,side) +subroutine psb_c_cuda_elg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_scal + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scal #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -75,4 +75,4 @@ subroutine psb_c_elg_scal(d,a,info,side) return -end subroutine psb_c_elg_scal +end subroutine psb_c_cuda_elg_scal diff --git a/cuda/impl/psb_c_elg_scals.F90 b/cuda/impl/psb_c_cuda_elg_scals.F90 similarity index 90% rename from cuda/impl/psb_c_elg_scals.F90 rename to cuda/impl/psb_c_cuda_elg_scals.F90 index b954e0a1..d20ee568 100644 --- a/cuda/impl/psb_c_elg_scals.F90 +++ b/cuda/impl/psb_c_cuda_elg_scals.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_scals(d,a,info) +subroutine psb_c_cuda_elg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_scals + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scals #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_c_elg_scals(d,a,info) return -end subroutine psb_c_elg_scals +end subroutine psb_c_cuda_elg_scals diff --git a/cuda/impl/psb_c_elg_to_gpu.F90 b/cuda/impl/psb_c_cuda_elg_to_gpu.F90 similarity index 93% rename from cuda/impl/psb_c_elg_to_gpu.F90 rename to cuda/impl/psb_c_cuda_elg_to_gpu.F90 index b967a59b..5ea61a41 100644 --- a/cuda/impl/psb_c_elg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_elg_to_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_to_gpu(a,info,nzrm) +subroutine psb_c_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_to_gpu + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_to_gpu #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -90,4 +90,4 @@ subroutine psb_c_elg_to_gpu(a,info,nzrm) call a%set_sync() #endif -end subroutine psb_c_elg_to_gpu +end subroutine psb_c_cuda_elg_to_gpu diff --git a/cuda/impl/psb_d_elg_trim.f90 b/cuda/impl/psb_c_cuda_elg_trim.f90 similarity index 92% rename from cuda/impl/psb_d_elg_trim.f90 rename to cuda/impl/psb_c_cuda_elg_trim.f90 index d2a2047c..483e189d 100644 --- a/cuda/impl/psb_d_elg_trim.f90 +++ b/cuda/impl/psb_c_cuda_elg_trim.f90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_elg_trim(a) +subroutine psb_c_cuda_elg_trim(a) use psb_base_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_trim + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_trim implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld character(len=20) :: name='trim' logical, parameter :: debug=.false. @@ -59,4 +59,4 @@ subroutine psb_d_elg_trim(a) return -end subroutine psb_d_elg_trim +end subroutine psb_c_cuda_elg_trim diff --git a/cuda/impl/psb_c_elg_vect_mv.F90 b/cuda/impl/psb_c_cuda_elg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_c_elg_vect_mv.F90 rename to cuda/impl/psb_c_cuda_elg_vect_mv.F90 index ec6e5b50..b89ba5a2 100644 --- a/cuda/impl/psb_c_elg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_elg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_vect_mv + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_vect_mv #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_elg_vect_mv' + character(len=20) :: name='c_cuda_elg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= czero) then @@ -128,4 +128,4 @@ subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_elg_vect_mv +end subroutine psb_c_cuda_elg_vect_mv diff --git a/cuda/impl/psb_c_hdiag_csmv.F90 b/cuda/impl/psb_c_cuda_hdiag_csmv.F90 similarity index 92% rename from cuda/impl/psb_c_hdiag_csmv.F90 rename to cuda/impl/psb_c_cuda_hdiag_csmv.F90 index 1ba58c6f..4ea2c269 100644 --- a/cuda/impl/psb_c_hdiag_csmv.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_csmv + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_csmv #else - use psb_c_hdiag_mat_mod + use psb_c_cuda_hdiag_mat_mod #endif implicit none - class(psb_c_hdiag_sparse_mat), intent(in) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='c_hdiag_csmv' + character(len=20) :: name='c_cuda_hdiag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hdiag_csmv +end subroutine psb_c_cuda_hdiag_csmv diff --git a/cuda/impl/psb_c_hdiag_mold.F90 b/cuda/impl/psb_c_cuda_hdiag_mold.F90 similarity index 88% rename from cuda/impl/psb_c_hdiag_mold.F90 rename to cuda/impl/psb_c_cuda_hdiag_mold.F90 index bc913690..67e0b92e 100644 --- a/cuda/impl/psb_c_hdiag_mold.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_hdiag_mold(a,b,info) +subroutine psb_c_cuda_hdiag_mold(a,b,info) use psb_base_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_mold + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_mold implicit none - class(psb_c_hdiag_sparse_mat), intent(in) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_hdiag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_hdiag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_hdiag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_c_hdiag_mold(a,b,info) return -end subroutine psb_c_hdiag_mold +end subroutine psb_c_cuda_hdiag_mold diff --git a/cuda/impl/psb_c_hdiag_to_gpu.F90 b/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 similarity index 92% rename from cuda/impl/psb_c_hdiag_to_gpu.F90 rename to cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 index 565babe0..63ab178a 100644 --- a/cuda/impl/psb_c_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hdiag_to_gpu(a,info) +subroutine psb_c_cuda_hdiag_to_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_to_gpu + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_to_gpu #else - use psb_c_hdiag_mat_mod + use psb_c_cuda_hdiag_mat_mod #endif use iso_c_binding implicit none - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight #ifdef HAVE_SPGPU @@ -83,4 +83,4 @@ subroutine psb_c_hdiag_to_gpu(a,info) #endif -end subroutine psb_c_hdiag_to_gpu +end subroutine psb_c_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_c_hdiag_vect_mv.F90 b/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_c_hdiag_vect_mv.F90 rename to cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 index a891a274..fb80611f 100644 --- a/cuda/impl/psb_c_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_vect_mv + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_vect_mv #else - use psb_c_hdiag_mat_mod + use psb_c_cuda_hdiag_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_hdiag_sparse_mat), intent(in) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hdiag_vect_mv' + character(len=20) :: name='c_cuda_hdiag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hdiag_vect_mv +end subroutine psb_c_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_c_hlg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_c_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 index 27e5c0b6..277b974f 100644 --- a/cuda/impl/psb_c_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hlg_allocate_mnnz(m,n,a,nz) +subroutine psb_c_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_allocate_mnnz + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_allocate_mnnz #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -68,4 +68,4 @@ subroutine psb_c_hlg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_c_hlg_allocate_mnnz +end subroutine psb_c_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_c_hlg_csmm.F90 b/cuda/impl/psb_c_cuda_hlg_csmm.F90 similarity index 93% rename from cuda/impl/psb_c_hlg_csmm.F90 rename to cuda/impl/psb_c_cuda_hlg_csmm.F90 index c33b2dde..f351ffd0 100644 --- a/cuda/impl/psb_c_hlg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_hlg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_csmm + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_csmm #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hlg_csmm' + character(len=20) :: name='c_cuda_hlg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -129,4 +129,4 @@ subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hlg_csmm +end subroutine psb_c_cuda_hlg_csmm diff --git a/cuda/impl/psb_c_hlg_csmv.F90 b/cuda/impl/psb_c_cuda_hlg_csmv.F90 similarity index 93% rename from cuda/impl/psb_c_hlg_csmv.F90 rename to cuda/impl/psb_c_cuda_hlg_csmv.F90 index 2599e44e..d39e5f51 100644 --- a/cuda/impl/psb_c_hlg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_hlg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_hlg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_csmv + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_csmv #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_hlg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='c_hlg_csmv' + character(len=20) :: name='c_cuda_hlg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -132,4 +132,4 @@ subroutine psb_c_hlg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hlg_csmv +end subroutine psb_c_cuda_hlg_csmv diff --git a/cuda/impl/psb_s_hlg_from_gpu.F90 b/cuda/impl/psb_c_cuda_hlg_from_gpu.F90 similarity index 92% rename from cuda/impl/psb_s_hlg_from_gpu.F90 rename to cuda/impl/psb_c_cuda_hlg_from_gpu.F90 index 1c34b15e..f823153d 100644 --- a/cuda/impl/psb_s_hlg_from_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hlg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_hlg_from_gpu(a,info) +subroutine psb_c_cuda_hlg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_from_gpu + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_from_gpu #else - use psb_s_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr @@ -73,4 +73,4 @@ subroutine psb_s_hlg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_s_hlg_from_gpu +end subroutine psb_c_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_c_hlg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_c_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 index 0955d8a1..6202885c 100644 --- a/cuda/impl/psb_c_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_inner_vect_sv + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_inner_vect_sv #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -78,4 +78,4 @@ subroutine psb_c_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hlg_inner_vect_sv +end subroutine psb_c_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_c_hlg_mold.F90 b/cuda/impl/psb_c_cuda_hlg_mold.F90 similarity index 89% rename from cuda/impl/psb_c_hlg_mold.F90 rename to cuda/impl/psb_c_cuda_hlg_mold.F90 index 321111f0..85453422 100644 --- a/cuda/impl/psb_c_hlg_mold.F90 +++ b/cuda/impl/psb_c_cuda_hlg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_hlg_mold(a,b,info) +subroutine psb_c_cuda_hlg_mold(a,b,info) use psb_base_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_mold + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_mold implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer, intent(out) :: info Integer :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_hlg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_hlg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_hlg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -61,4 +61,4 @@ subroutine psb_c_hlg_mold(a,b,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_c_hlg_mold +end subroutine psb_c_cuda_hlg_mold diff --git a/cuda/impl/psb_c_hlg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_c_hlg_reallocate_nz.F90 rename to cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 index a27c3f55..848b659d 100644 --- a/cuda/impl/psb_c_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_c_hlg_reallocate_nz(nz,a) +subroutine psb_c_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_reallocate_nz + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_reallocate_nz #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='c_hlg_reallocate_nz' + character(len=20) :: name='c_cuda_hlg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -64,4 +64,4 @@ subroutine psb_c_hlg_reallocate_nz(nz,a) return -end subroutine psb_c_hlg_reallocate_nz +end subroutine psb_c_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_c_hlg_scal.F90 b/cuda/impl/psb_c_cuda_hlg_scal.F90 similarity index 91% rename from cuda/impl/psb_c_hlg_scal.F90 rename to cuda/impl/psb_c_cuda_hlg_scal.F90 index b2c9d30d..d768048f 100644 --- a/cuda/impl/psb_c_hlg_scal.F90 +++ b/cuda/impl/psb_c_cuda_hlg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_hlg_scal(d,a,info,side) +subroutine psb_c_cuda_hlg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_scal + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_scal #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,4 +72,4 @@ subroutine psb_c_hlg_scal(d,a,info,side) return -end subroutine psb_c_hlg_scal +end subroutine psb_c_cuda_hlg_scal diff --git a/cuda/impl/psb_c_hlg_scals.F90 b/cuda/impl/psb_c_cuda_hlg_scals.F90 similarity index 91% rename from cuda/impl/psb_c_hlg_scals.F90 rename to cuda/impl/psb_c_cuda_hlg_scals.F90 index af2efb19..7574bf94 100644 --- a/cuda/impl/psb_c_hlg_scals.F90 +++ b/cuda/impl/psb_c_cuda_hlg_scals.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hlg_scals(d,a,info) +subroutine psb_c_cuda_hlg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_scals + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_scals #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_c_hlg_scals(d,a,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_c_hlg_scals +end subroutine psb_c_cuda_hlg_scals diff --git a/cuda/impl/psb_c_hlg_to_gpu.F90 b/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_c_hlg_to_gpu.F90 rename to cuda/impl/psb_c_cuda_hlg_to_gpu.F90 index 0d37bc24..d7fc8fb2 100644 --- a/cuda/impl/psb_c_hlg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hlg_to_gpu(a,info,nzrm) +subroutine psb_c_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_to_gpu + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_to_gpu #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -65,4 +65,4 @@ subroutine psb_c_hlg_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_c_hlg_to_gpu +end subroutine psb_c_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_c_hlg_vect_mv.F90 b/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_c_hlg_vect_mv.F90 rename to cuda/impl/psb_c_cuda_hlg_vect_mv.F90 index bc4e2f56..2d7a679e 100644 --- a/cuda/impl/psb_c_hlg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_vect_mv + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_vect_mv #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hlg_vect_mv' + character(len=20) :: name='c_cuda_hlg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -126,4 +126,4 @@ subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hlg_vect_mv +end subroutine psb_c_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_c_hybg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_c_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 index 5cd57fa2..eced26e0 100644 --- a/cuda/impl/psb_c_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_allocate_mnnz(m,n,a,nz) +subroutine psb_c_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_allocate_mnnz + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_allocate_mnnz #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,5 +65,5 @@ subroutine psb_c_hybg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_c_hybg_allocate_mnnz +end subroutine psb_c_cuda_hybg_allocate_mnnz #endif diff --git a/cuda/impl/psb_c_hybg_csmm.F90 b/cuda/impl/psb_c_cuda_hybg_csmm.F90 similarity index 93% rename from cuda/impl/psb_c_hybg_csmm.F90 rename to cuda/impl/psb_c_cuda_hybg_csmm.F90 index 7c8bb582..cc459f66 100644 --- a/cuda/impl/psb_c_hybg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_hybg_csmm.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_csmm + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_csmm #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hybg_csmm' + character(len=20) :: name='c_cuda_hybg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,5 +131,5 @@ subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hybg_csmm +end subroutine psb_c_cuda_hybg_csmm #endif diff --git a/cuda/impl/psb_c_hybg_csmv.F90 b/cuda/impl/psb_c_cuda_hybg_csmv.F90 similarity index 93% rename from cuda/impl/psb_c_hybg_csmv.F90 rename to cuda/impl/psb_c_cuda_hybg_csmv.F90 index 5e15bc1b..ab07d756 100644 --- a/cuda/impl/psb_c_hybg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_hybg_csmv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_csmv + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_csmv #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_c_hybg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hybg_csmv' + character(len=20) :: name='c_cuda_hybg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -134,5 +134,5 @@ subroutine psb_c_hybg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hybg_csmv +end subroutine psb_c_cuda_hybg_csmv #endif diff --git a/cuda/impl/psb_c_hybg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_c_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 index 20bc842d..fcaf49ff 100644 --- a/cuda/impl/psb_c_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_inner_vect_sv + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_inner_vect_sv #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -52,7 +52,7 @@ subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='c_hybg_inner_vect_sv' + character(len=20) :: name='c_cuda_hybg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -84,9 +84,9 @@ subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= czero) then if (yy%is_host()) call yy%sync() @@ -134,5 +134,5 @@ subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hybg_inner_vect_sv +end subroutine psb_c_cuda_hybg_inner_vect_sv #endif diff --git a/cuda/impl/psb_c_hybg_mold.F90 b/cuda/impl/psb_c_cuda_hybg_mold.F90 similarity index 89% rename from cuda/impl/psb_c_hybg_mold.F90 rename to cuda/impl/psb_c_cuda_hybg_mold.F90 index 54dd24c2..6fe4c378 100644 --- a/cuda/impl/psb_c_hybg_mold.F90 +++ b/cuda/impl/psb_c_cuda_hybg_mold.F90 @@ -30,12 +30,12 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_mold(a,b,info) +subroutine psb_c_cuda_hybg_mold(a,b,info) use psb_base_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_mold + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_mold implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_hybg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_hybg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_hybg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,5 +62,5 @@ subroutine psb_c_hybg_mold(a,b,info) return -end subroutine psb_c_hybg_mold +end subroutine psb_c_cuda_hybg_mold #endif diff --git a/cuda/impl/psb_c_hybg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 similarity index 88% rename from cuda/impl/psb_c_hybg_reallocate_nz.F90 rename to cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 index 3272b797..979eaad8 100644 --- a/cuda/impl/psb_c_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_reallocate_nz(nz,a) +subroutine psb_c_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_reallocate_nz + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_reallocate_nz #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='c_hybg_reallocate_nz' + character(len=20) :: name='c_cuda_hybg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,5 +67,5 @@ subroutine psb_c_hybg_reallocate_nz(nz,a) return -end subroutine psb_c_hybg_reallocate_nz +end subroutine psb_c_cuda_hybg_reallocate_nz #endif diff --git a/cuda/impl/psb_c_hybg_scal.F90 b/cuda/impl/psb_c_cuda_hybg_scal.F90 similarity index 91% rename from cuda/impl/psb_c_hybg_scal.F90 rename to cuda/impl/psb_c_cuda_hybg_scal.F90 index 1019f979..ac4d788e 100644 --- a/cuda/impl/psb_c_hybg_scal.F90 +++ b/cuda/impl/psb_c_cuda_hybg_scal.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_scal(d,a,info,side) +subroutine psb_c_cuda_hybg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_scal + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_scal #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,5 +72,5 @@ subroutine psb_c_hybg_scal(d,a,info,side) return -end subroutine psb_c_hybg_scal +end subroutine psb_c_cuda_hybg_scal #endif diff --git a/cuda/impl/psb_c_hybg_scals.F90 b/cuda/impl/psb_c_cuda_hybg_scals.F90 similarity index 91% rename from cuda/impl/psb_c_hybg_scals.F90 rename to cuda/impl/psb_c_cuda_hybg_scals.F90 index 1d09abbb..7def71d2 100644 --- a/cuda/impl/psb_c_hybg_scals.F90 +++ b/cuda/impl/psb_c_cuda_hybg_scals.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_scals(d,a,info) +subroutine psb_c_cuda_hybg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_scals + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_scals #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -72,5 +72,5 @@ subroutine psb_c_hybg_scals(d,a,info) return -end subroutine psb_c_hybg_scals +end subroutine psb_c_cuda_hybg_scals #endif diff --git a/cuda/impl/psb_c_hybg_to_gpu.F90 b/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 similarity index 96% rename from cuda/impl/psb_c_hybg_to_gpu.F90 rename to cuda/impl/psb_c_cuda_hybg_to_gpu.F90 index 107efba9..1a77586e 100644 --- a/cuda/impl/psb_c_hybg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_to_gpu(a,info,nzrm) +subroutine psb_c_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_to_gpu + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_to_gpu #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -150,5 +150,5 @@ subroutine psb_c_hybg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_c_hybg_to_gpu +end subroutine psb_c_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_c_hybg_vect_mv.F90 b/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_c_hybg_vect_mv.F90 rename to cuda/impl/psb_c_cuda_hybg_vect_mv.F90 index 3ed0f7fd..da20ca41 100644 --- a/cuda/impl/psb_c_hybg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 @@ -30,20 +30,20 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_vect_mv + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_vect_mv #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -53,7 +53,7 @@ subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hybg_vect_mv' + character(len=20) :: name='c_cuda_hybg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= czero) then if (yy%is_host()) call yy%sync() @@ -123,5 +123,5 @@ subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hybg_vect_mv +end subroutine psb_c_cuda_hybg_vect_mv #endif diff --git a/cuda/impl/psb_c_mv_csrg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_mv_csrg_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 index d2533c2d..f80a8f87 100644 --- a/cuda/impl/psb_c_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_mv_csrg_from_coo(a,b,info) +subroutine psb_c_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_mv_csrg_from_coo + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_mv_csrg_from_coo #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -62,4 +62,4 @@ subroutine psb_c_mv_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_mv_csrg_from_coo +end subroutine psb_c_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_c_mv_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_c_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 index 3e898e8f..1f23a6c2 100644 --- a/cuda/impl/psb_c_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_mv_csrg_from_fmt(a,b,info) +subroutine psb_c_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_mv_csrg_from_fmt + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_mv_csrg_from_fmt #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer, intent(out) :: info @@ -60,4 +60,4 @@ subroutine psb_c_mv_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_c_mv_csrg_from_fmt +end subroutine psb_c_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_c_mv_diag_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_mv_diag_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 index 34fe69b7..e20e0b0a 100644 --- a/cuda/impl/psb_c_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_c_mv_diag_from_coo(a,b,info) +subroutine psb_c_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_mv_diag_from_coo + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_mv_diag_from_coo #else - use psb_c_diag_mat_mod + use psb_c_cuda_diag_mat_mod #endif implicit none - class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -66,4 +66,4 @@ subroutine psb_c_mv_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_mv_diag_from_coo +end subroutine psb_c_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_c_mv_elg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_mv_elg_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 index acf7e28c..741058cd 100644 --- a/cuda/impl/psb_c_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_mv_elg_from_coo(a,b,info) +subroutine psb_c_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_mv_elg_from_coo + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_mv_elg_from_coo #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_c_mv_elg_from_coo(a,b,info) return -end subroutine psb_c_mv_elg_from_coo +end subroutine psb_c_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_c_mv_elg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 similarity index 92% rename from cuda/impl/psb_c_mv_elg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 index fb9e3cfe..b375bc63 100644 --- a/cuda/impl/psb_c_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_mv_elg_from_fmt(a,b,info) +subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_mv_elg_from_fmt + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_mv_elg_from_fmt #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -96,4 +96,4 @@ subroutine psb_c_mv_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_c_mv_elg_from_fmt +end subroutine psb_c_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_c_mv_hdiag_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_c_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 index 1d07bddb..8826081f 100644 --- a/cuda/impl/psb_c_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_c_mv_hdiag_from_coo(a,b,info) +subroutine psb_c_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_mv_hdiag_from_coo - use psb_gpu_env_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_c_hdiag_mat_mod + use psb_c_cuda_hdiag_mat_mod #endif implicit none - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_c_mv_hdiag_from_coo(a,b,info) #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_c_hdia_sparse_mat%mv_from_coo(b,info) @@ -71,4 +71,4 @@ subroutine psb_c_mv_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_mv_hdiag_from_coo +end subroutine psb_c_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_c_mv_hlg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 similarity index 88% rename from cuda/impl/psb_c_mv_hlg_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 index 0fa2d72d..416bbaed 100644 --- a/cuda/impl/psb_c_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_c_mv_hlg_from_coo(a,b,info) +subroutine psb_c_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_mv_hlg_from_coo + use psb_cuda_env_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_mv_hlg_from_coo #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_c_mv_hlg_from_coo(a,b,info) return -end subroutine psb_c_mv_hlg_from_coo +end subroutine psb_c_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_c_mv_hlg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_c_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 index 0581c7d6..aafe692d 100644 --- a/cuda/impl/psb_c_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_mv_hlg_from_fmt(a,b,info) +subroutine psb_c_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_mv_hlg_from_fmt + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_mv_hlg_from_fmt #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_c_mv_hlg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_c_mv_hlg_from_fmt +end subroutine psb_c_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_c_mv_hybg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_mv_hybg_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 index 7aca6065..eb5ba685 100644 --- a/cuda/impl/psb_c_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_mv_hybg_from_coo(a,b,info) +subroutine psb_c_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_mv_hybg_from_coo + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_mv_hybg_from_coo #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -61,5 +61,5 @@ subroutine psb_c_mv_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_mv_hybg_from_coo +end subroutine psb_c_cuda_mv_hybg_from_coo #endif diff --git a/cuda/impl/psb_c_mv_hybg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_c_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 index 41581b85..d74e89bd 100644 --- a/cuda/impl/psb_c_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_mv_hybg_from_fmt(a,b,info) +subroutine psb_c_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_mv_hybg_from_fmt + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_mv_hybg_from_fmt #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_c_mv_hybg_from_fmt(a,b,info) call a%to_gpu(info) #endif end select -end subroutine psb_c_mv_hybg_from_fmt +end subroutine psb_c_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/impl/psb_d_cp_csrg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_cp_csrg_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 index ec00007e..e3383af1 100644 --- a/cuda/impl/psb_d_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_d_cp_csrg_from_coo(a,b,info) +subroutine psb_d_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_cp_csrg_from_coo + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_cp_csrg_from_coo #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_d_cp_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_cp_csrg_from_coo +end subroutine psb_d_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_d_cp_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_d_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 index b3aabeed..28b46c76 100644 --- a/cuda/impl/psb_d_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_d_cp_csrg_from_fmt(a,b,info) +subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_cp_csrg_from_fmt + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_cp_csrg_from_fmt #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif !use iso_c_binding implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_d_cp_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_d_cp_csrg_from_fmt +end subroutine psb_d_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_d_cp_diag_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_cp_diag_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 index 06aff19d..d21bb469 100644 --- a/cuda/impl/psb_d_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_cp_diag_from_coo(a,b,info) +subroutine psb_d_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_cp_diag_from_coo + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_cp_diag_from_coo #else - use psb_d_diag_mat_mod + use psb_d_cuda_diag_mat_mod #endif implicit none - class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -61,4 +61,4 @@ subroutine psb_d_cp_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_cp_diag_from_coo +end subroutine psb_d_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_d_cp_elg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 similarity index 94% rename from cuda/impl/psb_d_cp_elg_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 index 381e4bfb..a4d58297 100644 --- a/cuda/impl/psb_d_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_d_cp_elg_from_coo(a,b,info) +subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_cp_elg_from_coo + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_cp_elg_from_coo use psi_ext_util_mod - use psb_gpu_env_mod + use psb_cuda_env_mod #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -58,7 +58,7 @@ subroutine psb_d_cp_elg_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - hacksize = max(1,psb_gpu_WarpSize()) + hacksize = max(1,psb_cuda_WarpSize()) #else hacksize = 1 #endif @@ -181,4 +181,4 @@ contains end subroutine psi_d_count_ell_from_coo -end subroutine psb_d_cp_elg_from_coo +end subroutine psb_d_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_d_cp_elg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 similarity index 93% rename from cuda/impl/psb_d_cp_elg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 index 9a6b6d41..31786c1b 100644 --- a/cuda/impl/psb_d_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_cp_elg_from_fmt(a,b,info) +subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_cp_elg_from_fmt + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_cp_elg_from_fmt #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -98,4 +98,4 @@ subroutine psb_d_cp_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_d_cp_elg_from_fmt +end subroutine psb_d_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_d_cp_hdiag_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_d_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 index 443452a1..efcf9d66 100644 --- a/cuda/impl/psb_d_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_d_cp_hdiag_from_coo(a,b,info) +subroutine psb_d_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_cp_hdiag_from_coo - use psb_gpu_env_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_d_hdiag_mat_mod + use psb_d_cuda_hdiag_mat_mod #endif implicit none - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_d_cp_hdiag_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_d_hdia_sparse_mat%cp_from_coo(b,info) @@ -70,4 +70,4 @@ subroutine psb_d_cp_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_cp_hdiag_from_coo +end subroutine psb_d_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_d_cp_hlg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 similarity index 95% rename from cuda/impl/psb_d_cp_hlg_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 index 02855fef..2fc898b2 100644 --- a/cuda/impl/psb_d_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_d_cp_hlg_from_coo(a,b,info) +subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_cp_hlg_from_coo + use psb_cuda_env_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_cp_hlg_from_coo #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -62,7 +62,7 @@ subroutine psb_d_cp_hlg_from_coo(a,b,info) debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() #ifdef HAVE_SPGPU - hksz = max(1,psb_gpu_WarpSize()) + hksz = max(1,psb_cuda_WarpSize()) #else hksz = psi_get_hksz() #endif @@ -195,4 +195,4 @@ contains !!$ write(*,*) 'End of psi_comput_hckoff ',info end subroutine psi_compute_hckoff_from_coo -end subroutine psb_d_cp_hlg_from_coo +end subroutine psb_d_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_d_cp_hlg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 similarity index 90% rename from cuda/impl/psb_d_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 index 133fbb32..0796630c 100644 --- a/cuda/impl/psb_d_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_cp_hlg_from_fmt(a,b,info) +subroutine psb_d_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_cp_hlg_from_fmt + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_cp_hlg_from_fmt #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -65,4 +65,4 @@ subroutine psb_d_cp_hlg_from_fmt(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_cp_hlg_from_fmt +end subroutine psb_d_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_d_cp_hybg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_cp_hybg_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 index a74409cb..f1f62a89 100644 --- a/cuda/impl/psb_d_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_cp_hybg_from_coo(a,b,info) +subroutine psb_d_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_cp_hybg_from_coo + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_cp_hybg_from_coo #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -60,5 +60,5 @@ subroutine psb_d_cp_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_cp_hybg_from_coo +end subroutine psb_d_cuda_cp_hybg_from_coo #endif diff --git a/cuda/impl/psb_d_cp_hybg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_d_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 index 91d59060..37c9cc42 100644 --- a/cuda/impl/psb_d_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_cp_hybg_from_fmt(a,b,info) +subroutine psb_d_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_cp_hybg_from_fmt + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_cp_hybg_from_fmt #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_d_cp_hybg_from_fmt(a,b,info) #endif end select -end subroutine psb_d_cp_hybg_from_fmt +end subroutine psb_d_cuda_cp_hybg_from_fmt #endif diff --git a/cuda/impl/psb_d_csrg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 similarity index 89% rename from cuda/impl/psb_d_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 index 7d2d4470..3858672c 100644 --- a/cuda/impl/psb_d_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_csrg_allocate_mnnz(m,n,a,nz) +subroutine psb_d_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_allocate_mnnz + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_allocate_mnnz #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,4 +65,4 @@ subroutine psb_d_csrg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_d_csrg_allocate_mnnz +end subroutine psb_d_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_d_csrg_csmm.F90 b/cuda/impl/psb_d_cuda_csrg_csmm.F90 similarity index 94% rename from cuda/impl/psb_d_csrg_csmm.F90 rename to cuda/impl/psb_d_cuda_csrg_csmm.F90 index 59c8343e..58251d9a 100644 --- a/cuda/impl/psb_d_csrg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_csrg_csmm.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_csrg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_csmm + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_csmm #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -131,4 +131,4 @@ subroutine psb_d_csrg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_csrg_csmm +end subroutine psb_d_cuda_csrg_csmm diff --git a/cuda/impl/psb_d_csrg_csmv.F90 b/cuda/impl/psb_d_cuda_csrg_csmv.F90 similarity index 93% rename from cuda/impl/psb_d_csrg_csmv.F90 rename to cuda/impl/psb_d_cuda_csrg_csmv.F90 index 44a6428b..269760f0 100644 --- a/cuda/impl/psb_d_csrg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_csrg_csmv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_csrg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_csmv + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_csmv #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_d_csrg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_csrg_csmv' + character(len=20) :: name='d_cuda_csrg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -136,4 +136,4 @@ subroutine psb_d_csrg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_csrg_csmv +end subroutine psb_d_cuda_csrg_csmv diff --git a/cuda/impl/psb_s_csrg_from_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_s_csrg_from_gpu.F90 rename to cuda/impl/psb_d_cuda_csrg_from_gpu.F90 index 23748d97..c451a99f 100644 --- a/cuda/impl/psb_s_csrg_from_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_csrg_from_gpu(a,info) +subroutine psb_d_cuda_csrg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_from_gpu + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_from_gpu #else - use psb_s_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, n, nz @@ -70,4 +70,4 @@ subroutine psb_s_csrg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_s_csrg_from_gpu +end subroutine psb_d_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_d_csrg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_d_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 index 016d63d6..60ee541f 100644 --- a/cuda/impl/psb_d_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_inner_vect_sv + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_inner_vect_sv #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -51,7 +51,7 @@ subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='d_csrg_inner_vect_sv' + character(len=20) :: name='d_cuda_csrg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -83,9 +83,9 @@ subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -133,4 +133,4 @@ subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_csrg_inner_vect_sv +end subroutine psb_d_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_d_csrg_mold.F90 b/cuda/impl/psb_d_cuda_csrg_mold.F90 similarity index 88% rename from cuda/impl/psb_d_csrg_mold.F90 rename to cuda/impl/psb_d_cuda_csrg_mold.F90 index d7288868..eec34975 100644 --- a/cuda/impl/psb_d_csrg_mold.F90 +++ b/cuda/impl/psb_d_cuda_csrg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_csrg_mold(a,b,info) +subroutine psb_d_cuda_csrg_mold(a,b,info) use psb_base_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_mold + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_mold implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_csrg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_csrg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_csrg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_d_csrg_mold(a,b,info) return -end subroutine psb_d_csrg_mold +end subroutine psb_d_cuda_csrg_mold diff --git a/cuda/impl/psb_d_csrg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_d_csrg_reallocate_nz.F90 rename to cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 index 083091f5..dbf34958 100644 --- a/cuda/impl/psb_d_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_d_csrg_reallocate_nz(nz,a) +subroutine psb_d_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_reallocate_nz + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_reallocate_nz #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='d_csrg_reallocate_nz' + character(len=20) :: name='d_cuda_csrg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,4 +67,4 @@ subroutine psb_d_csrg_reallocate_nz(nz,a) return -end subroutine psb_d_csrg_reallocate_nz +end subroutine psb_d_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_d_csrg_scal.F90 b/cuda/impl/psb_d_cuda_csrg_scal.F90 similarity index 90% rename from cuda/impl/psb_d_csrg_scal.F90 rename to cuda/impl/psb_d_cuda_csrg_scal.F90 index 60dbaecd..73e1b9f3 100644 --- a/cuda/impl/psb_d_csrg_scal.F90 +++ b/cuda/impl/psb_d_cuda_csrg_scal.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_d_csrg_scal(d,a,info,side) +subroutine psb_d_cuda_csrg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_scal + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_scal #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -70,4 +70,4 @@ subroutine psb_d_csrg_scal(d,a,info,side) return -end subroutine psb_d_csrg_scal +end subroutine psb_d_cuda_csrg_scal diff --git a/cuda/impl/psb_d_csrg_scals.F90 b/cuda/impl/psb_d_cuda_csrg_scals.F90 similarity index 90% rename from cuda/impl/psb_d_csrg_scals.F90 rename to cuda/impl/psb_d_cuda_csrg_scals.F90 index 6d4a1f40..cf8d6270 100644 --- a/cuda/impl/psb_d_csrg_scals.F90 +++ b/cuda/impl/psb_d_cuda_csrg_scals.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_d_csrg_scals(d,a,info) +subroutine psb_d_cuda_csrg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_scals + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_scals #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -68,4 +68,4 @@ subroutine psb_d_csrg_scals(d,a,info) return -end subroutine psb_d_csrg_scals +end subroutine psb_d_cuda_csrg_scals diff --git a/cuda/impl/psb_d_csrg_to_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 similarity index 98% rename from cuda/impl/psb_d_csrg_to_gpu.F90 rename to cuda/impl/psb_d_cuda_csrg_to_gpu.F90 index eb5d3942..a0e72cb4 100644 --- a/cuda/impl/psb_d_csrg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_d_csrg_to_gpu(a,info,nzrm) +subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_to_gpu + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_to_gpu #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -322,4 +322,4 @@ subroutine psb_d_csrg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_d_csrg_to_gpu +end subroutine psb_d_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_d_csrg_vect_mv.F90 b/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_d_csrg_vect_mv.F90 rename to cuda/impl/psb_d_cuda_csrg_vect_mv.F90 index f7124bbb..b828d878 100644 --- a/cuda/impl/psb_d_csrg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_vect_mv + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_vect_mv #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -54,7 +54,7 @@ subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_csrg_vect_mv' + character(len=20) :: name='d_cuda_csrg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -122,4 +122,4 @@ subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) 9999 call psb_error_handler(err_act) return -end subroutine psb_d_csrg_vect_mv +end subroutine psb_d_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_d_diag_csmv.F90 b/cuda/impl/psb_d_cuda_diag_csmv.F90 similarity index 92% rename from cuda/impl/psb_d_diag_csmv.F90 rename to cuda/impl/psb_d_cuda_diag_csmv.F90 index af9ad2db..8b49769e 100644 --- a/cuda/impl/psb_d_diag_csmv.F90 +++ b/cuda/impl/psb_d_cuda_diag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_csmv + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_csmv #else - use psb_d_diag_mat_mod + use psb_d_cuda_diag_mat_mod #endif implicit none - class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='d_diag_csmv' + character(len=20) :: name='d_cuda_diag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_diag_csmv +end subroutine psb_d_cuda_diag_csmv diff --git a/cuda/impl/psb_d_diag_mold.F90 b/cuda/impl/psb_d_cuda_diag_mold.F90 similarity index 88% rename from cuda/impl/psb_d_diag_mold.F90 rename to cuda/impl/psb_d_cuda_diag_mold.F90 index 4b0d066a..7cea069e 100644 --- a/cuda/impl/psb_d_diag_mold.F90 +++ b/cuda/impl/psb_d_cuda_diag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_diag_mold(a,b,info) +subroutine psb_d_cuda_diag_mold(a,b,info) use psb_base_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_mold + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_mold implicit none - class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_diag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_diag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_diag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_d_diag_mold(a,b,info) return -end subroutine psb_d_diag_mold +end subroutine psb_d_cuda_diag_mold diff --git a/cuda/impl/psb_d_diag_to_gpu.F90 b/cuda/impl/psb_d_cuda_diag_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_d_diag_to_gpu.F90 rename to cuda/impl/psb_d_cuda_diag_to_gpu.F90 index de244124..4903de8a 100644 --- a/cuda/impl/psb_d_diag_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_diag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_diag_to_gpu(a,info,nzrm) +subroutine psb_d_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_to_gpu + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_to_gpu #else - use psb_d_diag_mat_mod + use psb_d_cuda_diag_mat_mod #endif use iso_c_binding implicit none - class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -71,4 +71,4 @@ subroutine psb_d_diag_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_d_diag_to_gpu +end subroutine psb_d_cuda_diag_to_gpu diff --git a/cuda/impl/psb_d_diag_vect_mv.F90 b/cuda/impl/psb_d_cuda_diag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_d_diag_vect_mv.F90 rename to cuda/impl/psb_d_cuda_diag_vect_mv.F90 index 3f2f5ac6..0f23d363 100644 --- a/cuda/impl/psb_d_diag_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_diag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_vect_mv + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_vect_mv #else - use psb_d_diag_mat_mod + use psb_d_cuda_diag_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_diag_vect_mv' + character(len=20) :: name='d_cuda_diag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_diag_vect_mv +end subroutine psb_d_cuda_diag_vect_mv diff --git a/cuda/impl/psb_d_dnsg_mat_impl.F90 b/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 similarity index 77% rename from cuda/impl/psb_d_dnsg_mat_impl.F90 rename to cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 index a7915898..8d922d82 100644 --- a/cuda/impl/psb_d_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_dnsg_vect_mv + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_vect_mv #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(in) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -50,7 +50,7 @@ subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) character :: trans_ real(psb_dpk_), allocatable :: rx(:), ry(:) Integer(Psb_ipk_) :: err_act, m, n, k - character(len=20) :: name='d_dnsg_vect_mv' + character(len=20) :: name='d_cuda_dnsg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -76,9 +76,9 @@ subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) k = a%get_nrows() end if select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= dzero) then @@ -117,21 +117,21 @@ subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_dnsg_vect_mv +end subroutine psb_d_cuda_dnsg_vect_mv -subroutine psb_d_dnsg_mold(a,b,info) +subroutine psb_d_cuda_dnsg_mold(a,b,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_dnsg_mold + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_mold #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(in) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -145,7 +145,7 @@ subroutine psb_d_dnsg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_dnsg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_dnsg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -158,54 +158,54 @@ subroutine psb_d_dnsg_mold(a,b,info) return -end subroutine psb_d_dnsg_mold +end subroutine psb_d_cuda_dnsg_mold !!$ !!$ interface -!!$ subroutine psb_d_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_d_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type -!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_d_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_dnsg_inner_vect_sv +!!$ end subroutine psb_d_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_d_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_d_dnsg_reallocate_nz +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_d_dnsg_allocate_mnnz +!!$ end subroutine psb_d_cuda_dnsg_allocate_mnnz !!$ end interface -subroutine psb_d_dnsg_to_gpu(a,info) +subroutine psb_d_cuda_dnsg_to_gpu(a,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_dnsg_to_gpu + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_to_gpu #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda logical, parameter :: debug=.false. - character(len=20) :: name='d_dnsg_to_gpu' + character(len=20) :: name='d_cuda_dnsg_to_gpu' call psb_erractionsave(err_act) info = psb_success_ @@ -226,27 +226,27 @@ subroutine psb_d_dnsg_to_gpu(a,info) return -end subroutine psb_d_dnsg_to_gpu +end subroutine psb_d_cuda_dnsg_to_gpu -subroutine psb_d_cp_dnsg_from_coo(a,b,info) +subroutine psb_d_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_cp_dnsg_from_coo + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_cp_dnsg_from_coo #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_dnsg_cp_from_coo' + character(len=20) :: name='d_cuda_dnsg_cp_from_coo' integer(psb_ipk_) :: debug_level, debug_unit logical, parameter :: debug=.false. type(psb_d_coo_sparse_mat) :: tmp @@ -267,27 +267,27 @@ subroutine psb_d_cp_dnsg_from_coo(a,b,info) return -end subroutine psb_d_cp_dnsg_from_coo +end subroutine psb_d_cuda_cp_dnsg_from_coo -subroutine psb_d_cp_dnsg_from_fmt(a,b,info) +subroutine psb_d_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_cp_dnsg_from_fmt + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_cp_dnsg_from_fmt #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info type(psb_d_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_dnsg_cp_from_fmt' + character(len=20) :: name='d_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -341,29 +341,29 @@ subroutine psb_d_cp_dnsg_from_fmt(a,b,info) return -end subroutine psb_d_cp_dnsg_from_fmt +end subroutine psb_d_cuda_cp_dnsg_from_fmt -subroutine psb_d_mv_dnsg_from_coo(a,b,info) +subroutine psb_d_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_mv_dnsg_from_coo + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_mv_dnsg_from_coo #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act logical, parameter :: debug=.false. - character(len=20) :: name='d_dnsg_mv_from_coo' + character(len=20) :: name='d_cuda_dnsg_mv_from_coo' call psb_erractionsave(err_act) info = psb_success_ @@ -382,28 +382,28 @@ subroutine psb_d_mv_dnsg_from_coo(a,b,info) return -end subroutine psb_d_mv_dnsg_from_coo +end subroutine psb_d_cuda_mv_dnsg_from_coo -subroutine psb_d_mv_dnsg_from_fmt(a,b,info) +subroutine psb_d_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_mv_dnsg_from_fmt + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_mv_dnsg_from_fmt #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info type(psb_d_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_dnsg_cp_from_fmt' + character(len=20) :: name='d_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -458,4 +458,4 @@ subroutine psb_d_mv_dnsg_from_fmt(a,b,info) return -end subroutine psb_d_mv_dnsg_from_fmt +end subroutine psb_d_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_d_elg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 similarity index 93% rename from cuda/impl/psb_d_elg_allocate_mnnz.F90 rename to cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 index 105f5617..b9308514 100644 --- a/cuda/impl/psb_d_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_elg_allocate_mnnz(m,n,a,nz) +subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_allocate_mnnz + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_allocate_mnnz #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -110,4 +110,4 @@ subroutine psb_d_elg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_d_elg_allocate_mnnz +end subroutine psb_d_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_s_elg_asb.f90 b/cuda/impl/psb_d_cuda_elg_asb.f90 similarity index 92% rename from cuda/impl/psb_s_elg_asb.f90 rename to cuda/impl/psb_d_cuda_elg_asb.f90 index 190be710..c158ccde 100644 --- a/cuda/impl/psb_s_elg_asb.f90 +++ b/cuda/impl/psb_d_cuda_elg_asb.f90 @@ -30,13 +30,13 @@ ! -subroutine psb_s_elg_asb(a) +subroutine psb_d_cuda_elg_asb(a) use psb_base_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_asb + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_asb implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: err_act, info character(len=20) :: name='elg_asb' @@ -62,4 +62,4 @@ subroutine psb_s_elg_asb(a) return -end subroutine psb_s_elg_asb +end subroutine psb_d_cuda_elg_asb diff --git a/cuda/impl/psb_d_elg_csmm.F90 b/cuda/impl/psb_d_cuda_elg_csmm.F90 similarity index 93% rename from cuda/impl/psb_d_elg_csmm.F90 rename to cuda/impl/psb_d_cuda_elg_csmm.F90 index add9c3b2..2d9883fa 100644 --- a/cuda/impl/psb_d_elg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_elg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csmm + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csmm #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_elg_csmm' + character(len=20) :: name='d_cuda_elg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,4 +131,4 @@ subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_elg_csmm +end subroutine psb_d_cuda_elg_csmm diff --git a/cuda/impl/psb_d_elg_csmv.F90 b/cuda/impl/psb_d_cuda_elg_csmv.F90 similarity index 94% rename from cuda/impl/psb_d_elg_csmv.F90 rename to cuda/impl/psb_d_cuda_elg_csmv.F90 index 5237fb75..6420e28d 100644 --- a/cuda/impl/psb_d_elg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_elg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csmv + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csmv #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -133,4 +133,4 @@ subroutine psb_d_elg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_elg_csmv +end subroutine psb_d_cuda_elg_csmv diff --git a/cuda/impl/psb_d_elg_csput.F90 b/cuda/impl/psb_d_cuda_elg_csput.F90 similarity index 89% rename from cuda/impl/psb_d_elg_csput.F90 rename to cuda/impl/psb_d_cuda_elg_csput.F90 index 107c69d0..19d26c43 100644 --- a/cuda/impl/psb_d_elg_csput.F90 +++ b/cuda/impl/psb_d_cuda_elg_csput.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_d_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_d_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csput_a + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csput_a #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - character(len=20) :: name='d_elg_csput_a' + character(len=20) :: name='d_cuda_elg_csput_a' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit real(psb_dpk_) :: t1,t2,t3 @@ -120,24 +120,24 @@ subroutine psb_d_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_d_elg_csput_a +end subroutine psb_d_cuda_elg_csput_a -subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_d_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csput_v - use psb_d_gpu_vect_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csput_v + use psb_d_cuda_vect_mod #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax @@ -145,7 +145,7 @@ subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act - character(len=20) :: name='d_elg_csput_v' + character(len=20) :: name='d_cuda_elg_csput_v' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw logical :: gpu_invoked @@ -199,11 +199,11 @@ subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) t1=psb_wtime() gpu_invoked = .false. select type (ia) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (ja) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (val) - class is (psb_d_vect_gpu) + class is (psb_d_vect_cuda) if (a%is_host()) call a%sync() if (val%is_host()) call val%sync() if (ia%is_host()) call ia%sync() @@ -245,4 +245,4 @@ subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_d_elg_csput_v +end subroutine psb_d_cuda_elg_csput_v diff --git a/cuda/impl/psb_d_elg_from_gpu.F90 b/cuda/impl/psb_d_cuda_elg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_d_elg_from_gpu.F90 rename to cuda/impl/psb_d_cuda_elg_from_gpu.F90 index c1da9584..b532a83c 100644 --- a/cuda/impl/psb_d_elg_from_gpu.F90 +++ b/cuda/impl/psb_d_cuda_elg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_from_gpu(a,info) +subroutine psb_d_cuda_elg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_from_gpu + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_from_gpu #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize @@ -71,4 +71,4 @@ subroutine psb_d_elg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_d_elg_from_gpu +end subroutine psb_d_cuda_elg_from_gpu diff --git a/cuda/impl/psb_d_elg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 similarity index 89% rename from cuda/impl/psb_d_elg_inner_vect_sv.F90 rename to cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 index 333946bf..c262969f 100644 --- a/cuda/impl/psb_d_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_d_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_inner_vect_sv + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_inner_vect_sv #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans integer(psb_ipk_) :: err_act - character(len=20) :: name='d_elg_inner_vect_sv' + character(len=20) :: name='d_cuda_elg_inner_vect_sv' logical, parameter :: debug=.false. real(psb_dpk_), allocatable :: rx(:), ry(:) @@ -86,4 +86,4 @@ subroutine psb_d_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_elg_inner_vect_sv +end subroutine psb_d_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_d_elg_mold.F90 b/cuda/impl/psb_d_cuda_elg_mold.F90 similarity index 89% rename from cuda/impl/psb_d_elg_mold.F90 rename to cuda/impl/psb_d_cuda_elg_mold.F90 index 3fd6d071..f887f96f 100644 --- a/cuda/impl/psb_d_elg_mold.F90 +++ b/cuda/impl/psb_d_cuda_elg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_elg_mold(a,b,info) +subroutine psb_d_cuda_elg_mold(a,b,info) use psb_base_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_mold + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_mold implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_elg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_elg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_elg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_d_elg_mold(a,b,info) return -end subroutine psb_d_elg_mold +end subroutine psb_d_cuda_elg_mold diff --git a/cuda/impl/psb_d_elg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 similarity index 89% rename from cuda/impl/psb_d_elg_reallocate_nz.F90 rename to cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 index 70b3705c..66c583e1 100644 --- a/cuda/impl/psb_d_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_d_elg_reallocate_nz(nz,a) +subroutine psb_d_cuda_elg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_reallocate_nz + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_reallocate_nz #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='d_elg_reallocate_nz' + character(len=20) :: name='d_cuda_elg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -76,4 +76,4 @@ subroutine psb_d_elg_reallocate_nz(nz,a) return -end subroutine psb_d_elg_reallocate_nz +end subroutine psb_d_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_d_elg_scal.F90 b/cuda/impl/psb_d_cuda_elg_scal.F90 similarity index 91% rename from cuda/impl/psb_d_elg_scal.F90 rename to cuda/impl/psb_d_cuda_elg_scal.F90 index 53ab82d7..7aa21c93 100644 --- a/cuda/impl/psb_d_elg_scal.F90 +++ b/cuda/impl/psb_d_cuda_elg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_scal(d,a,info,side) +subroutine psb_d_cuda_elg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_scal + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_scal #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -75,4 +75,4 @@ subroutine psb_d_elg_scal(d,a,info,side) return -end subroutine psb_d_elg_scal +end subroutine psb_d_cuda_elg_scal diff --git a/cuda/impl/psb_d_elg_scals.F90 b/cuda/impl/psb_d_cuda_elg_scals.F90 similarity index 90% rename from cuda/impl/psb_d_elg_scals.F90 rename to cuda/impl/psb_d_cuda_elg_scals.F90 index f85780ce..1950b366 100644 --- a/cuda/impl/psb_d_elg_scals.F90 +++ b/cuda/impl/psb_d_cuda_elg_scals.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_scals(d,a,info) +subroutine psb_d_cuda_elg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_scals + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_scals #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_d_elg_scals(d,a,info) return -end subroutine psb_d_elg_scals +end subroutine psb_d_cuda_elg_scals diff --git a/cuda/impl/psb_d_elg_to_gpu.F90 b/cuda/impl/psb_d_cuda_elg_to_gpu.F90 similarity index 93% rename from cuda/impl/psb_d_elg_to_gpu.F90 rename to cuda/impl/psb_d_cuda_elg_to_gpu.F90 index 28e61606..b589ec2d 100644 --- a/cuda/impl/psb_d_elg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_elg_to_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_to_gpu(a,info,nzrm) +subroutine psb_d_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_to_gpu + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_to_gpu #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -90,4 +90,4 @@ subroutine psb_d_elg_to_gpu(a,info,nzrm) call a%set_sync() #endif -end subroutine psb_d_elg_to_gpu +end subroutine psb_d_cuda_elg_to_gpu diff --git a/cuda/impl/psb_s_elg_trim.f90 b/cuda/impl/psb_d_cuda_elg_trim.f90 similarity index 92% rename from cuda/impl/psb_s_elg_trim.f90 rename to cuda/impl/psb_d_cuda_elg_trim.f90 index f3bd3b2f..be573c8c 100644 --- a/cuda/impl/psb_s_elg_trim.f90 +++ b/cuda/impl/psb_d_cuda_elg_trim.f90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_elg_trim(a) +subroutine psb_d_cuda_elg_trim(a) use psb_base_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_trim + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_trim implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld character(len=20) :: name='trim' logical, parameter :: debug=.false. @@ -59,4 +59,4 @@ subroutine psb_s_elg_trim(a) return -end subroutine psb_s_elg_trim +end subroutine psb_d_cuda_elg_trim diff --git a/cuda/impl/psb_d_elg_vect_mv.F90 b/cuda/impl/psb_d_cuda_elg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_d_elg_vect_mv.F90 rename to cuda/impl/psb_d_cuda_elg_vect_mv.F90 index e46f84da..1be57d22 100644 --- a/cuda/impl/psb_d_elg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_elg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_vect_mv + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_vect_mv #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_elg_vect_mv' + character(len=20) :: name='d_cuda_elg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= dzero) then @@ -128,4 +128,4 @@ subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_elg_vect_mv +end subroutine psb_d_cuda_elg_vect_mv diff --git a/cuda/impl/psb_d_hdiag_csmv.F90 b/cuda/impl/psb_d_cuda_hdiag_csmv.F90 similarity index 92% rename from cuda/impl/psb_d_hdiag_csmv.F90 rename to cuda/impl/psb_d_cuda_hdiag_csmv.F90 index 6f6bcedf..4bcd6e7a 100644 --- a/cuda/impl/psb_d_hdiag_csmv.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_csmv + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_csmv #else - use psb_d_hdiag_mat_mod + use psb_d_cuda_hdiag_mat_mod #endif implicit none - class(psb_d_hdiag_sparse_mat), intent(in) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='d_hdiag_csmv' + character(len=20) :: name='d_cuda_hdiag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hdiag_csmv +end subroutine psb_d_cuda_hdiag_csmv diff --git a/cuda/impl/psb_d_hdiag_mold.F90 b/cuda/impl/psb_d_cuda_hdiag_mold.F90 similarity index 88% rename from cuda/impl/psb_d_hdiag_mold.F90 rename to cuda/impl/psb_d_cuda_hdiag_mold.F90 index b6c254e9..c5028c07 100644 --- a/cuda/impl/psb_d_hdiag_mold.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_hdiag_mold(a,b,info) +subroutine psb_d_cuda_hdiag_mold(a,b,info) use psb_base_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_mold + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_mold implicit none - class(psb_d_hdiag_sparse_mat), intent(in) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_hdiag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_hdiag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_hdiag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_d_hdiag_mold(a,b,info) return -end subroutine psb_d_hdiag_mold +end subroutine psb_d_cuda_hdiag_mold diff --git a/cuda/impl/psb_s_hdiag_to_gpu.F90 b/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 similarity index 92% rename from cuda/impl/psb_s_hdiag_to_gpu.F90 rename to cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 index ade1c080..ca79b9fa 100644 --- a/cuda/impl/psb_s_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hdiag_to_gpu(a,info) +subroutine psb_d_cuda_hdiag_to_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_to_gpu + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_to_gpu #else - use psb_s_hdiag_mat_mod + use psb_d_cuda_hdiag_mat_mod #endif use iso_c_binding implicit none - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight #ifdef HAVE_SPGPU @@ -83,4 +83,4 @@ subroutine psb_s_hdiag_to_gpu(a,info) #endif -end subroutine psb_s_hdiag_to_gpu +end subroutine psb_d_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_d_hdiag_vect_mv.F90 b/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_d_hdiag_vect_mv.F90 rename to cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 index db7ec9c6..74233f90 100644 --- a/cuda/impl/psb_d_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_vect_mv + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_vect_mv #else - use psb_d_hdiag_mat_mod + use psb_d_cuda_hdiag_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_hdiag_sparse_mat), intent(in) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hdiag_vect_mv' + character(len=20) :: name='d_cuda_hdiag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hdiag_vect_mv +end subroutine psb_d_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_d_hlg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_d_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 index 6f327e81..3382327f 100644 --- a/cuda/impl/psb_d_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hlg_allocate_mnnz(m,n,a,nz) +subroutine psb_d_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_allocate_mnnz + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_allocate_mnnz #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -68,4 +68,4 @@ subroutine psb_d_hlg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_d_hlg_allocate_mnnz +end subroutine psb_d_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_d_hlg_csmm.F90 b/cuda/impl/psb_d_cuda_hlg_csmm.F90 similarity index 93% rename from cuda/impl/psb_d_hlg_csmm.F90 rename to cuda/impl/psb_d_cuda_hlg_csmm.F90 index 120f3e06..a223aace 100644 --- a/cuda/impl/psb_d_hlg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_hlg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_csmm + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_csmm #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hlg_csmm' + character(len=20) :: name='d_cuda_hlg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -129,4 +129,4 @@ subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hlg_csmm +end subroutine psb_d_cuda_hlg_csmm diff --git a/cuda/impl/psb_d_hlg_csmv.F90 b/cuda/impl/psb_d_cuda_hlg_csmv.F90 similarity index 93% rename from cuda/impl/psb_d_hlg_csmv.F90 rename to cuda/impl/psb_d_cuda_hlg_csmv.F90 index 4584826d..04779296 100644 --- a/cuda/impl/psb_d_hlg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_hlg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_hlg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_csmv + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_csmv #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_hlg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='d_hlg_csmv' + character(len=20) :: name='d_cuda_hlg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -132,4 +132,4 @@ subroutine psb_d_hlg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hlg_csmv +end subroutine psb_d_cuda_hlg_csmv diff --git a/cuda/impl/psb_d_hlg_from_gpu.F90 b/cuda/impl/psb_d_cuda_hlg_from_gpu.F90 similarity index 92% rename from cuda/impl/psb_d_hlg_from_gpu.F90 rename to cuda/impl/psb_d_cuda_hlg_from_gpu.F90 index eec714f4..7c1a2de8 100644 --- a/cuda/impl/psb_d_hlg_from_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hlg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_hlg_from_gpu(a,info) +subroutine psb_d_cuda_hlg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_from_gpu + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_from_gpu #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr @@ -73,4 +73,4 @@ subroutine psb_d_hlg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_d_hlg_from_gpu +end subroutine psb_d_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_d_hlg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_d_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 index 0ad867a3..c6bd68b5 100644 --- a/cuda/impl/psb_d_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_inner_vect_sv + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_inner_vect_sv #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -78,4 +78,4 @@ subroutine psb_d_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hlg_inner_vect_sv +end subroutine psb_d_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_d_hlg_mold.F90 b/cuda/impl/psb_d_cuda_hlg_mold.F90 similarity index 89% rename from cuda/impl/psb_d_hlg_mold.F90 rename to cuda/impl/psb_d_cuda_hlg_mold.F90 index 3ce9f33a..dddce134 100644 --- a/cuda/impl/psb_d_hlg_mold.F90 +++ b/cuda/impl/psb_d_cuda_hlg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_hlg_mold(a,b,info) +subroutine psb_d_cuda_hlg_mold(a,b,info) use psb_base_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_mold + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_mold implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer, intent(out) :: info Integer :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_hlg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_hlg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_hlg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -61,4 +61,4 @@ subroutine psb_d_hlg_mold(a,b,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_d_hlg_mold +end subroutine psb_d_cuda_hlg_mold diff --git a/cuda/impl/psb_d_hlg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_d_hlg_reallocate_nz.F90 rename to cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 index c9fa4771..aa2954d6 100644 --- a/cuda/impl/psb_d_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_d_hlg_reallocate_nz(nz,a) +subroutine psb_d_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_reallocate_nz + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_reallocate_nz #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='d_hlg_reallocate_nz' + character(len=20) :: name='d_cuda_hlg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -64,4 +64,4 @@ subroutine psb_d_hlg_reallocate_nz(nz,a) return -end subroutine psb_d_hlg_reallocate_nz +end subroutine psb_d_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_d_hlg_scal.F90 b/cuda/impl/psb_d_cuda_hlg_scal.F90 similarity index 91% rename from cuda/impl/psb_d_hlg_scal.F90 rename to cuda/impl/psb_d_cuda_hlg_scal.F90 index b487303d..3cbfada0 100644 --- a/cuda/impl/psb_d_hlg_scal.F90 +++ b/cuda/impl/psb_d_cuda_hlg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_hlg_scal(d,a,info,side) +subroutine psb_d_cuda_hlg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_scal + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_scal #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,4 +72,4 @@ subroutine psb_d_hlg_scal(d,a,info,side) return -end subroutine psb_d_hlg_scal +end subroutine psb_d_cuda_hlg_scal diff --git a/cuda/impl/psb_d_hlg_scals.F90 b/cuda/impl/psb_d_cuda_hlg_scals.F90 similarity index 91% rename from cuda/impl/psb_d_hlg_scals.F90 rename to cuda/impl/psb_d_cuda_hlg_scals.F90 index e3f676e9..1ddf764f 100644 --- a/cuda/impl/psb_d_hlg_scals.F90 +++ b/cuda/impl/psb_d_cuda_hlg_scals.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hlg_scals(d,a,info) +subroutine psb_d_cuda_hlg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_scals + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_scals #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_d_hlg_scals(d,a,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_d_hlg_scals +end subroutine psb_d_cuda_hlg_scals diff --git a/cuda/impl/psb_d_hlg_to_gpu.F90 b/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_d_hlg_to_gpu.F90 rename to cuda/impl/psb_d_cuda_hlg_to_gpu.F90 index 5e3b3558..82737315 100644 --- a/cuda/impl/psb_d_hlg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hlg_to_gpu(a,info,nzrm) +subroutine psb_d_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_to_gpu + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_to_gpu #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -65,4 +65,4 @@ subroutine psb_d_hlg_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_d_hlg_to_gpu +end subroutine psb_d_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_d_hlg_vect_mv.F90 b/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_d_hlg_vect_mv.F90 rename to cuda/impl/psb_d_cuda_hlg_vect_mv.F90 index cd5e95e5..9d0741c4 100644 --- a/cuda/impl/psb_d_hlg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_vect_mv + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_vect_mv #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hlg_vect_mv' + character(len=20) :: name='d_cuda_hlg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -126,4 +126,4 @@ subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hlg_vect_mv +end subroutine psb_d_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_d_hybg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_d_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 index 1565a719..b0bff6c0 100644 --- a/cuda/impl/psb_d_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_allocate_mnnz(m,n,a,nz) +subroutine psb_d_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_allocate_mnnz + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_allocate_mnnz #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,5 +65,5 @@ subroutine psb_d_hybg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_d_hybg_allocate_mnnz +end subroutine psb_d_cuda_hybg_allocate_mnnz #endif diff --git a/cuda/impl/psb_d_hybg_csmm.F90 b/cuda/impl/psb_d_cuda_hybg_csmm.F90 similarity index 93% rename from cuda/impl/psb_d_hybg_csmm.F90 rename to cuda/impl/psb_d_cuda_hybg_csmm.F90 index abc0e0c2..3fcfd17f 100644 --- a/cuda/impl/psb_d_hybg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_hybg_csmm.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_csmm + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_csmm #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hybg_csmm' + character(len=20) :: name='d_cuda_hybg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,5 +131,5 @@ subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hybg_csmm +end subroutine psb_d_cuda_hybg_csmm #endif diff --git a/cuda/impl/psb_d_hybg_csmv.F90 b/cuda/impl/psb_d_cuda_hybg_csmv.F90 similarity index 93% rename from cuda/impl/psb_d_hybg_csmv.F90 rename to cuda/impl/psb_d_cuda_hybg_csmv.F90 index c636ec8a..5e06f633 100644 --- a/cuda/impl/psb_d_hybg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_hybg_csmv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_csmv + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_csmv #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_d_hybg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hybg_csmv' + character(len=20) :: name='d_cuda_hybg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -134,5 +134,5 @@ subroutine psb_d_hybg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hybg_csmv +end subroutine psb_d_cuda_hybg_csmv #endif diff --git a/cuda/impl/psb_d_hybg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_d_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 index 82b536ca..a30c1abe 100644 --- a/cuda/impl/psb_d_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_inner_vect_sv + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_inner_vect_sv #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -52,7 +52,7 @@ subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='d_hybg_inner_vect_sv' + character(len=20) :: name='d_cuda_hybg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -84,9 +84,9 @@ subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -134,5 +134,5 @@ subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hybg_inner_vect_sv +end subroutine psb_d_cuda_hybg_inner_vect_sv #endif diff --git a/cuda/impl/psb_d_hybg_mold.F90 b/cuda/impl/psb_d_cuda_hybg_mold.F90 similarity index 89% rename from cuda/impl/psb_d_hybg_mold.F90 rename to cuda/impl/psb_d_cuda_hybg_mold.F90 index 27390db0..fa239696 100644 --- a/cuda/impl/psb_d_hybg_mold.F90 +++ b/cuda/impl/psb_d_cuda_hybg_mold.F90 @@ -30,12 +30,12 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_mold(a,b,info) +subroutine psb_d_cuda_hybg_mold(a,b,info) use psb_base_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_mold + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_mold implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_hybg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_hybg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_hybg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,5 +62,5 @@ subroutine psb_d_hybg_mold(a,b,info) return -end subroutine psb_d_hybg_mold +end subroutine psb_d_cuda_hybg_mold #endif diff --git a/cuda/impl/psb_d_hybg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 similarity index 88% rename from cuda/impl/psb_d_hybg_reallocate_nz.F90 rename to cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 index 537101e9..cadce8d3 100644 --- a/cuda/impl/psb_d_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_reallocate_nz(nz,a) +subroutine psb_d_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_reallocate_nz + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_reallocate_nz #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='d_hybg_reallocate_nz' + character(len=20) :: name='d_cuda_hybg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,5 +67,5 @@ subroutine psb_d_hybg_reallocate_nz(nz,a) return -end subroutine psb_d_hybg_reallocate_nz +end subroutine psb_d_cuda_hybg_reallocate_nz #endif diff --git a/cuda/impl/psb_d_hybg_scal.F90 b/cuda/impl/psb_d_cuda_hybg_scal.F90 similarity index 91% rename from cuda/impl/psb_d_hybg_scal.F90 rename to cuda/impl/psb_d_cuda_hybg_scal.F90 index 32ef2da0..126e25cb 100644 --- a/cuda/impl/psb_d_hybg_scal.F90 +++ b/cuda/impl/psb_d_cuda_hybg_scal.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_scal(d,a,info,side) +subroutine psb_d_cuda_hybg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_scal + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_scal #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,5 +72,5 @@ subroutine psb_d_hybg_scal(d,a,info,side) return -end subroutine psb_d_hybg_scal +end subroutine psb_d_cuda_hybg_scal #endif diff --git a/cuda/impl/psb_d_hybg_scals.F90 b/cuda/impl/psb_d_cuda_hybg_scals.F90 similarity index 91% rename from cuda/impl/psb_d_hybg_scals.F90 rename to cuda/impl/psb_d_cuda_hybg_scals.F90 index 8c38328a..88b7e05c 100644 --- a/cuda/impl/psb_d_hybg_scals.F90 +++ b/cuda/impl/psb_d_cuda_hybg_scals.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_scals(d,a,info) +subroutine psb_d_cuda_hybg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_scals + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_scals #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -72,5 +72,5 @@ subroutine psb_d_hybg_scals(d,a,info) return -end subroutine psb_d_hybg_scals +end subroutine psb_d_cuda_hybg_scals #endif diff --git a/cuda/impl/psb_d_hybg_to_gpu.F90 b/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 similarity index 96% rename from cuda/impl/psb_d_hybg_to_gpu.F90 rename to cuda/impl/psb_d_cuda_hybg_to_gpu.F90 index 33bf55b8..d94a75c7 100644 --- a/cuda/impl/psb_d_hybg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_to_gpu(a,info,nzrm) +subroutine psb_d_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_to_gpu + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_to_gpu #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -150,5 +150,5 @@ subroutine psb_d_hybg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_d_hybg_to_gpu +end subroutine psb_d_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_d_hybg_vect_mv.F90 b/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_d_hybg_vect_mv.F90 rename to cuda/impl/psb_d_cuda_hybg_vect_mv.F90 index d9653a48..9d0aedb7 100644 --- a/cuda/impl/psb_d_hybg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 @@ -30,20 +30,20 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_vect_mv + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_vect_mv #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -53,7 +53,7 @@ subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hybg_vect_mv' + character(len=20) :: name='d_cuda_hybg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,5 +123,5 @@ subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hybg_vect_mv +end subroutine psb_d_cuda_hybg_vect_mv #endif diff --git a/cuda/impl/psb_d_mv_csrg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_mv_csrg_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 index 8c59e6d1..18e7c636 100644 --- a/cuda/impl/psb_d_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_mv_csrg_from_coo(a,b,info) +subroutine psb_d_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_mv_csrg_from_coo + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_mv_csrg_from_coo #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -62,4 +62,4 @@ subroutine psb_d_mv_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_mv_csrg_from_coo +end subroutine psb_d_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_d_mv_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_d_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 index 30c133e4..837c78c1 100644 --- a/cuda/impl/psb_d_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_mv_csrg_from_fmt(a,b,info) +subroutine psb_d_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_mv_csrg_from_fmt + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_mv_csrg_from_fmt #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer, intent(out) :: info @@ -60,4 +60,4 @@ subroutine psb_d_mv_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_d_mv_csrg_from_fmt +end subroutine psb_d_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_d_mv_diag_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_mv_diag_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 index f37a5523..8d33c459 100644 --- a/cuda/impl/psb_d_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_d_mv_diag_from_coo(a,b,info) +subroutine psb_d_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_mv_diag_from_coo + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_mv_diag_from_coo #else - use psb_d_diag_mat_mod + use psb_d_cuda_diag_mat_mod #endif implicit none - class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -66,4 +66,4 @@ subroutine psb_d_mv_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_mv_diag_from_coo +end subroutine psb_d_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_d_mv_elg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_mv_elg_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 index 73216cfa..ad9e7f10 100644 --- a/cuda/impl/psb_d_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_mv_elg_from_coo(a,b,info) +subroutine psb_d_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_mv_elg_from_coo + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_mv_elg_from_coo #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_d_mv_elg_from_coo(a,b,info) return -end subroutine psb_d_mv_elg_from_coo +end subroutine psb_d_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_d_mv_elg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 similarity index 92% rename from cuda/impl/psb_d_mv_elg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 index 5038c50e..9cdf790e 100644 --- a/cuda/impl/psb_d_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_mv_elg_from_fmt(a,b,info) +subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_mv_elg_from_fmt + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_mv_elg_from_fmt #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -96,4 +96,4 @@ subroutine psb_d_mv_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_d_mv_elg_from_fmt +end subroutine psb_d_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_d_mv_hdiag_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_d_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 index ee0e983f..aff5e0c0 100644 --- a/cuda/impl/psb_d_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_d_mv_hdiag_from_coo(a,b,info) +subroutine psb_d_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_mv_hdiag_from_coo - use psb_gpu_env_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_d_hdiag_mat_mod + use psb_d_cuda_hdiag_mat_mod #endif implicit none - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_d_mv_hdiag_from_coo(a,b,info) #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_d_hdia_sparse_mat%mv_from_coo(b,info) @@ -71,4 +71,4 @@ subroutine psb_d_mv_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_mv_hdiag_from_coo +end subroutine psb_d_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_d_mv_hlg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 similarity index 88% rename from cuda/impl/psb_d_mv_hlg_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 index fe030415..a2b358c4 100644 --- a/cuda/impl/psb_d_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_d_mv_hlg_from_coo(a,b,info) +subroutine psb_d_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_mv_hlg_from_coo + use psb_cuda_env_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_mv_hlg_from_coo #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_d_mv_hlg_from_coo(a,b,info) return -end subroutine psb_d_mv_hlg_from_coo +end subroutine psb_d_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_d_mv_hlg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_d_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 index e538b017..130d88c2 100644 --- a/cuda/impl/psb_d_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_mv_hlg_from_fmt(a,b,info) +subroutine psb_d_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_mv_hlg_from_fmt + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_mv_hlg_from_fmt #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_d_mv_hlg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_d_mv_hlg_from_fmt +end subroutine psb_d_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_d_mv_hybg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_mv_hybg_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 index 4fe76c72..8b0ad032 100644 --- a/cuda/impl/psb_d_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_mv_hybg_from_coo(a,b,info) +subroutine psb_d_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_mv_hybg_from_coo + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_mv_hybg_from_coo #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -61,5 +61,5 @@ subroutine psb_d_mv_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_mv_hybg_from_coo +end subroutine psb_d_cuda_mv_hybg_from_coo #endif diff --git a/cuda/impl/psb_d_mv_hybg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_d_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 index 454533d0..71badfc5 100644 --- a/cuda/impl/psb_d_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_mv_hybg_from_fmt(a,b,info) +subroutine psb_d_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_mv_hybg_from_fmt + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_mv_hybg_from_fmt #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_d_mv_hybg_from_fmt(a,b,info) call a%to_gpu(info) #endif end select -end subroutine psb_d_mv_hybg_from_fmt +end subroutine psb_d_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/impl/psb_s_cp_csrg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_cp_csrg_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 index 4a714d41..b7bebc95 100644 --- a/cuda/impl/psb_s_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_s_cp_csrg_from_coo(a,b,info) +subroutine psb_s_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_cp_csrg_from_coo + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_cp_csrg_from_coo #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_s_cp_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_cp_csrg_from_coo +end subroutine psb_s_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_s_cp_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_s_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 index 962a8c9d..7ab9283d 100644 --- a/cuda/impl/psb_s_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_s_cp_csrg_from_fmt(a,b,info) +subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_cp_csrg_from_fmt + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_cp_csrg_from_fmt #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif !use iso_c_binding implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_s_cp_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_s_cp_csrg_from_fmt +end subroutine psb_s_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_s_cp_diag_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_cp_diag_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 index 6b105ef2..9f038a09 100644 --- a/cuda/impl/psb_s_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_cp_diag_from_coo(a,b,info) +subroutine psb_s_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_cp_diag_from_coo + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_cp_diag_from_coo #else - use psb_s_diag_mat_mod + use psb_s_cuda_diag_mat_mod #endif implicit none - class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -61,4 +61,4 @@ subroutine psb_s_cp_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_cp_diag_from_coo +end subroutine psb_s_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_s_cp_elg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 similarity index 94% rename from cuda/impl/psb_s_cp_elg_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 index af8c7d28..f6e1ba42 100644 --- a/cuda/impl/psb_s_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_s_cp_elg_from_coo(a,b,info) +subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_cp_elg_from_coo + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_cp_elg_from_coo use psi_ext_util_mod - use psb_gpu_env_mod + use psb_cuda_env_mod #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -58,7 +58,7 @@ subroutine psb_s_cp_elg_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - hacksize = max(1,psb_gpu_WarpSize()) + hacksize = max(1,psb_cuda_WarpSize()) #else hacksize = 1 #endif @@ -181,4 +181,4 @@ contains end subroutine psi_s_count_ell_from_coo -end subroutine psb_s_cp_elg_from_coo +end subroutine psb_s_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_s_cp_elg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 similarity index 93% rename from cuda/impl/psb_s_cp_elg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 index c3d973e1..0c811426 100644 --- a/cuda/impl/psb_s_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_cp_elg_from_fmt(a,b,info) +subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_cp_elg_from_fmt + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_cp_elg_from_fmt #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -98,4 +98,4 @@ subroutine psb_s_cp_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_s_cp_elg_from_fmt +end subroutine psb_s_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_s_cp_hdiag_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_s_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 index 0509706d..07b56fa6 100644 --- a/cuda/impl/psb_s_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_s_cp_hdiag_from_coo(a,b,info) +subroutine psb_s_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_cp_hdiag_from_coo - use psb_gpu_env_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_s_hdiag_mat_mod + use psb_s_cuda_hdiag_mat_mod #endif implicit none - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_s_cp_hdiag_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_s_hdia_sparse_mat%cp_from_coo(b,info) @@ -70,4 +70,4 @@ subroutine psb_s_cp_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_cp_hdiag_from_coo +end subroutine psb_s_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_s_cp_hlg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 similarity index 95% rename from cuda/impl/psb_s_cp_hlg_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 index 5988c8dd..055fa046 100644 --- a/cuda/impl/psb_s_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_s_cp_hlg_from_coo(a,b,info) +subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_cp_hlg_from_coo + use psb_cuda_env_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_cp_hlg_from_coo #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -62,7 +62,7 @@ subroutine psb_s_cp_hlg_from_coo(a,b,info) debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() #ifdef HAVE_SPGPU - hksz = max(1,psb_gpu_WarpSize()) + hksz = max(1,psb_cuda_WarpSize()) #else hksz = psi_get_hksz() #endif @@ -195,4 +195,4 @@ contains !!$ write(*,*) 'End of psi_comput_hckoff ',info end subroutine psi_compute_hckoff_from_coo -end subroutine psb_s_cp_hlg_from_coo +end subroutine psb_s_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_s_cp_hlg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 similarity index 90% rename from cuda/impl/psb_s_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 index 41c20866..b49be761 100644 --- a/cuda/impl/psb_s_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_cp_hlg_from_fmt(a,b,info) +subroutine psb_s_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_cp_hlg_from_fmt + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_cp_hlg_from_fmt #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -65,4 +65,4 @@ subroutine psb_s_cp_hlg_from_fmt(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_cp_hlg_from_fmt +end subroutine psb_s_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_s_cp_hybg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_cp_hybg_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 index 92dc4a68..ab135944 100644 --- a/cuda/impl/psb_s_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_cp_hybg_from_coo(a,b,info) +subroutine psb_s_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_cp_hybg_from_coo + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_cp_hybg_from_coo #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -60,5 +60,5 @@ subroutine psb_s_cp_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_cp_hybg_from_coo +end subroutine psb_s_cuda_cp_hybg_from_coo #endif diff --git a/cuda/impl/psb_s_cp_hybg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_s_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 index 53143776..62a54759 100644 --- a/cuda/impl/psb_s_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_cp_hybg_from_fmt(a,b,info) +subroutine psb_s_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_cp_hybg_from_fmt + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_cp_hybg_from_fmt #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_s_cp_hybg_from_fmt(a,b,info) #endif end select -end subroutine psb_s_cp_hybg_from_fmt +end subroutine psb_s_cuda_cp_hybg_from_fmt #endif diff --git a/cuda/impl/psb_s_csrg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 similarity index 89% rename from cuda/impl/psb_s_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 index e93452d2..53ca8f12 100644 --- a/cuda/impl/psb_s_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_csrg_allocate_mnnz(m,n,a,nz) +subroutine psb_s_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_allocate_mnnz + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_allocate_mnnz #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,4 +65,4 @@ subroutine psb_s_csrg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_s_csrg_allocate_mnnz +end subroutine psb_s_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_s_csrg_csmm.F90 b/cuda/impl/psb_s_cuda_csrg_csmm.F90 similarity index 94% rename from cuda/impl/psb_s_csrg_csmm.F90 rename to cuda/impl/psb_s_cuda_csrg_csmm.F90 index 55087053..c8ff4a9e 100644 --- a/cuda/impl/psb_s_csrg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_csrg_csmm.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_csrg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_csmm + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_csmm #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -131,4 +131,4 @@ subroutine psb_s_csrg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_csrg_csmm +end subroutine psb_s_cuda_csrg_csmm diff --git a/cuda/impl/psb_s_csrg_csmv.F90 b/cuda/impl/psb_s_cuda_csrg_csmv.F90 similarity index 93% rename from cuda/impl/psb_s_csrg_csmv.F90 rename to cuda/impl/psb_s_cuda_csrg_csmv.F90 index 42520843..72658c28 100644 --- a/cuda/impl/psb_s_csrg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_csrg_csmv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_csrg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_csmv + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_csmv #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_s_csrg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_csrg_csmv' + character(len=20) :: name='s_cuda_csrg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -136,4 +136,4 @@ subroutine psb_s_csrg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_csrg_csmv +end subroutine psb_s_cuda_csrg_csmv diff --git a/cuda/impl/psb_z_csrg_from_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_z_csrg_from_gpu.F90 rename to cuda/impl/psb_s_cuda_csrg_from_gpu.F90 index f8a6951a..7811f746 100644 --- a/cuda/impl/psb_z_csrg_from_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_csrg_from_gpu(a,info) +subroutine psb_s_cuda_csrg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_from_gpu + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_from_gpu #else - use psb_z_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, n, nz @@ -70,4 +70,4 @@ subroutine psb_z_csrg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_z_csrg_from_gpu +end subroutine psb_s_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_s_csrg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_s_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 index 133a6350..7f9965d8 100644 --- a/cuda/impl/psb_s_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_inner_vect_sv + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_inner_vect_sv #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -51,7 +51,7 @@ subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='s_csrg_inner_vect_sv' + character(len=20) :: name='s_cuda_csrg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -83,9 +83,9 @@ subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -133,4 +133,4 @@ subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_csrg_inner_vect_sv +end subroutine psb_s_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_s_csrg_mold.F90 b/cuda/impl/psb_s_cuda_csrg_mold.F90 similarity index 88% rename from cuda/impl/psb_s_csrg_mold.F90 rename to cuda/impl/psb_s_cuda_csrg_mold.F90 index 6ac4cc3d..5e33850b 100644 --- a/cuda/impl/psb_s_csrg_mold.F90 +++ b/cuda/impl/psb_s_cuda_csrg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_csrg_mold(a,b,info) +subroutine psb_s_cuda_csrg_mold(a,b,info) use psb_base_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_mold + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_mold implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_csrg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_csrg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_csrg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_s_csrg_mold(a,b,info) return -end subroutine psb_s_csrg_mold +end subroutine psb_s_cuda_csrg_mold diff --git a/cuda/impl/psb_s_csrg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_s_csrg_reallocate_nz.F90 rename to cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 index dd9a50d0..fed3b0e7 100644 --- a/cuda/impl/psb_s_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_s_csrg_reallocate_nz(nz,a) +subroutine psb_s_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_reallocate_nz + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_reallocate_nz #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='s_csrg_reallocate_nz' + character(len=20) :: name='s_cuda_csrg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,4 +67,4 @@ subroutine psb_s_csrg_reallocate_nz(nz,a) return -end subroutine psb_s_csrg_reallocate_nz +end subroutine psb_s_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_s_csrg_scal.F90 b/cuda/impl/psb_s_cuda_csrg_scal.F90 similarity index 90% rename from cuda/impl/psb_s_csrg_scal.F90 rename to cuda/impl/psb_s_cuda_csrg_scal.F90 index 5e0fbcf0..826ab2dd 100644 --- a/cuda/impl/psb_s_csrg_scal.F90 +++ b/cuda/impl/psb_s_cuda_csrg_scal.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_s_csrg_scal(d,a,info,side) +subroutine psb_s_cuda_csrg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_scal + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_scal #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -70,4 +70,4 @@ subroutine psb_s_csrg_scal(d,a,info,side) return -end subroutine psb_s_csrg_scal +end subroutine psb_s_cuda_csrg_scal diff --git a/cuda/impl/psb_s_csrg_scals.F90 b/cuda/impl/psb_s_cuda_csrg_scals.F90 similarity index 90% rename from cuda/impl/psb_s_csrg_scals.F90 rename to cuda/impl/psb_s_cuda_csrg_scals.F90 index 54b299a1..04f4c29a 100644 --- a/cuda/impl/psb_s_csrg_scals.F90 +++ b/cuda/impl/psb_s_cuda_csrg_scals.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_s_csrg_scals(d,a,info) +subroutine psb_s_cuda_csrg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_scals + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_scals #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -68,4 +68,4 @@ subroutine psb_s_csrg_scals(d,a,info) return -end subroutine psb_s_csrg_scals +end subroutine psb_s_cuda_csrg_scals diff --git a/cuda/impl/psb_s_csrg_to_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 similarity index 98% rename from cuda/impl/psb_s_csrg_to_gpu.F90 rename to cuda/impl/psb_s_cuda_csrg_to_gpu.F90 index f90ae4ea..eadca5df 100644 --- a/cuda/impl/psb_s_csrg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_s_csrg_to_gpu(a,info,nzrm) +subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_to_gpu + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_to_gpu #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -322,4 +322,4 @@ subroutine psb_s_csrg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_s_csrg_to_gpu +end subroutine psb_s_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_s_csrg_vect_mv.F90 b/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_s_csrg_vect_mv.F90 rename to cuda/impl/psb_s_cuda_csrg_vect_mv.F90 index ff88bf89..38e2dfc0 100644 --- a/cuda/impl/psb_s_csrg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_vect_mv + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_vect_mv #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -54,7 +54,7 @@ subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_csrg_vect_mv' + character(len=20) :: name='s_cuda_csrg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= szero) then if (yy%is_host()) call yy%sync() @@ -122,4 +122,4 @@ subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) 9999 call psb_error_handler(err_act) return -end subroutine psb_s_csrg_vect_mv +end subroutine psb_s_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_s_diag_csmv.F90 b/cuda/impl/psb_s_cuda_diag_csmv.F90 similarity index 92% rename from cuda/impl/psb_s_diag_csmv.F90 rename to cuda/impl/psb_s_cuda_diag_csmv.F90 index 4cf14d12..214cf6f8 100644 --- a/cuda/impl/psb_s_diag_csmv.F90 +++ b/cuda/impl/psb_s_cuda_diag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_csmv + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_csmv #else - use psb_s_diag_mat_mod + use psb_s_cuda_diag_mat_mod #endif implicit none - class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='s_diag_csmv' + character(len=20) :: name='s_cuda_diag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_diag_csmv +end subroutine psb_s_cuda_diag_csmv diff --git a/cuda/impl/psb_s_diag_mold.F90 b/cuda/impl/psb_s_cuda_diag_mold.F90 similarity index 88% rename from cuda/impl/psb_s_diag_mold.F90 rename to cuda/impl/psb_s_cuda_diag_mold.F90 index a7690f62..9e6c58a6 100644 --- a/cuda/impl/psb_s_diag_mold.F90 +++ b/cuda/impl/psb_s_cuda_diag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_diag_mold(a,b,info) +subroutine psb_s_cuda_diag_mold(a,b,info) use psb_base_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_mold + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_mold implicit none - class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_diag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_diag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_diag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_s_diag_mold(a,b,info) return -end subroutine psb_s_diag_mold +end subroutine psb_s_cuda_diag_mold diff --git a/cuda/impl/psb_s_diag_to_gpu.F90 b/cuda/impl/psb_s_cuda_diag_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_s_diag_to_gpu.F90 rename to cuda/impl/psb_s_cuda_diag_to_gpu.F90 index bb09b127..c1ee7401 100644 --- a/cuda/impl/psb_s_diag_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_diag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_diag_to_gpu(a,info,nzrm) +subroutine psb_s_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_to_gpu + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_to_gpu #else - use psb_s_diag_mat_mod + use psb_s_cuda_diag_mat_mod #endif use iso_c_binding implicit none - class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -71,4 +71,4 @@ subroutine psb_s_diag_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_s_diag_to_gpu +end subroutine psb_s_cuda_diag_to_gpu diff --git a/cuda/impl/psb_s_diag_vect_mv.F90 b/cuda/impl/psb_s_cuda_diag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_s_diag_vect_mv.F90 rename to cuda/impl/psb_s_cuda_diag_vect_mv.F90 index 31976247..ab655b7c 100644 --- a/cuda/impl/psb_s_diag_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_diag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_vect_mv + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_vect_mv #else - use psb_s_diag_mat_mod + use psb_s_cuda_diag_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_diag_vect_mv' + character(len=20) :: name='s_cuda_diag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_diag_vect_mv +end subroutine psb_s_cuda_diag_vect_mv diff --git a/cuda/impl/psb_s_dnsg_mat_impl.F90 b/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 similarity index 77% rename from cuda/impl/psb_s_dnsg_mat_impl.F90 rename to cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 index 13c58985..861724aa 100644 --- a/cuda/impl/psb_s_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_dnsg_vect_mv + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_vect_mv #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(in) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -50,7 +50,7 @@ subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) character :: trans_ real(psb_spk_), allocatable :: rx(:), ry(:) Integer(Psb_ipk_) :: err_act, m, n, k - character(len=20) :: name='s_dnsg_vect_mv' + character(len=20) :: name='s_cuda_dnsg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -76,9 +76,9 @@ subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) k = a%get_nrows() end if select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= szero) then @@ -117,21 +117,21 @@ subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_dnsg_vect_mv +end subroutine psb_s_cuda_dnsg_vect_mv -subroutine psb_s_dnsg_mold(a,b,info) +subroutine psb_s_cuda_dnsg_mold(a,b,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_dnsg_mold + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_mold #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(in) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -145,7 +145,7 @@ subroutine psb_s_dnsg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_dnsg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_dnsg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -158,54 +158,54 @@ subroutine psb_s_dnsg_mold(a,b,info) return -end subroutine psb_s_dnsg_mold +end subroutine psb_s_cuda_dnsg_mold !!$ !!$ interface -!!$ subroutine psb_s_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_s_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type -!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_s_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_dnsg_inner_vect_sv +!!$ end subroutine psb_s_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_s_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_s_dnsg_reallocate_nz +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_s_dnsg_allocate_mnnz +!!$ end subroutine psb_s_cuda_dnsg_allocate_mnnz !!$ end interface -subroutine psb_s_dnsg_to_gpu(a,info) +subroutine psb_s_cuda_dnsg_to_gpu(a,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_dnsg_to_gpu + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_to_gpu #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda logical, parameter :: debug=.false. - character(len=20) :: name='s_dnsg_to_gpu' + character(len=20) :: name='s_cuda_dnsg_to_gpu' call psb_erractionsave(err_act) info = psb_success_ @@ -226,27 +226,27 @@ subroutine psb_s_dnsg_to_gpu(a,info) return -end subroutine psb_s_dnsg_to_gpu +end subroutine psb_s_cuda_dnsg_to_gpu -subroutine psb_s_cp_dnsg_from_coo(a,b,info) +subroutine psb_s_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_cp_dnsg_from_coo + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_cp_dnsg_from_coo #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_dnsg_cp_from_coo' + character(len=20) :: name='s_cuda_dnsg_cp_from_coo' integer(psb_ipk_) :: debug_level, debug_unit logical, parameter :: debug=.false. type(psb_s_coo_sparse_mat) :: tmp @@ -267,27 +267,27 @@ subroutine psb_s_cp_dnsg_from_coo(a,b,info) return -end subroutine psb_s_cp_dnsg_from_coo +end subroutine psb_s_cuda_cp_dnsg_from_coo -subroutine psb_s_cp_dnsg_from_fmt(a,b,info) +subroutine psb_s_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_cp_dnsg_from_fmt + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_cp_dnsg_from_fmt #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info type(psb_s_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_dnsg_cp_from_fmt' + character(len=20) :: name='s_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -341,29 +341,29 @@ subroutine psb_s_cp_dnsg_from_fmt(a,b,info) return -end subroutine psb_s_cp_dnsg_from_fmt +end subroutine psb_s_cuda_cp_dnsg_from_fmt -subroutine psb_s_mv_dnsg_from_coo(a,b,info) +subroutine psb_s_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_mv_dnsg_from_coo + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_mv_dnsg_from_coo #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act logical, parameter :: debug=.false. - character(len=20) :: name='s_dnsg_mv_from_coo' + character(len=20) :: name='s_cuda_dnsg_mv_from_coo' call psb_erractionsave(err_act) info = psb_success_ @@ -382,28 +382,28 @@ subroutine psb_s_mv_dnsg_from_coo(a,b,info) return -end subroutine psb_s_mv_dnsg_from_coo +end subroutine psb_s_cuda_mv_dnsg_from_coo -subroutine psb_s_mv_dnsg_from_fmt(a,b,info) +subroutine psb_s_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_mv_dnsg_from_fmt + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_mv_dnsg_from_fmt #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info type(psb_s_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_dnsg_cp_from_fmt' + character(len=20) :: name='s_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -458,4 +458,4 @@ subroutine psb_s_mv_dnsg_from_fmt(a,b,info) return -end subroutine psb_s_mv_dnsg_from_fmt +end subroutine psb_s_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_s_elg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 similarity index 93% rename from cuda/impl/psb_s_elg_allocate_mnnz.F90 rename to cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 index f3b1d743..63c41644 100644 --- a/cuda/impl/psb_s_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_elg_allocate_mnnz(m,n,a,nz) +subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_allocate_mnnz + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_allocate_mnnz #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -110,4 +110,4 @@ subroutine psb_s_elg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_s_elg_allocate_mnnz +end subroutine psb_s_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_c_elg_asb.f90 b/cuda/impl/psb_s_cuda_elg_asb.f90 similarity index 92% rename from cuda/impl/psb_c_elg_asb.f90 rename to cuda/impl/psb_s_cuda_elg_asb.f90 index f2a8c641..0d53c26a 100644 --- a/cuda/impl/psb_c_elg_asb.f90 +++ b/cuda/impl/psb_s_cuda_elg_asb.f90 @@ -30,13 +30,13 @@ ! -subroutine psb_c_elg_asb(a) +subroutine psb_s_cuda_elg_asb(a) use psb_base_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_asb + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_asb implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: err_act, info character(len=20) :: name='elg_asb' @@ -62,4 +62,4 @@ subroutine psb_c_elg_asb(a) return -end subroutine psb_c_elg_asb +end subroutine psb_s_cuda_elg_asb diff --git a/cuda/impl/psb_s_elg_csmm.F90 b/cuda/impl/psb_s_cuda_elg_csmm.F90 similarity index 93% rename from cuda/impl/psb_s_elg_csmm.F90 rename to cuda/impl/psb_s_cuda_elg_csmm.F90 index 8bda23e3..e7f88a2e 100644 --- a/cuda/impl/psb_s_elg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_elg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csmm + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csmm #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_elg_csmm' + character(len=20) :: name='s_cuda_elg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,4 +131,4 @@ subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_elg_csmm +end subroutine psb_s_cuda_elg_csmm diff --git a/cuda/impl/psb_s_elg_csmv.F90 b/cuda/impl/psb_s_cuda_elg_csmv.F90 similarity index 94% rename from cuda/impl/psb_s_elg_csmv.F90 rename to cuda/impl/psb_s_cuda_elg_csmv.F90 index 29e345c2..1844d338 100644 --- a/cuda/impl/psb_s_elg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_elg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csmv + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csmv #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -133,4 +133,4 @@ subroutine psb_s_elg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_elg_csmv +end subroutine psb_s_cuda_elg_csmv diff --git a/cuda/impl/psb_s_elg_csput.F90 b/cuda/impl/psb_s_cuda_elg_csput.F90 similarity index 89% rename from cuda/impl/psb_s_elg_csput.F90 rename to cuda/impl/psb_s_cuda_elg_csput.F90 index ff2b0ff3..036eabb2 100644 --- a/cuda/impl/psb_s_elg_csput.F90 +++ b/cuda/impl/psb_s_cuda_elg_csput.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_s_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_s_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csput_a + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csput_a #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - character(len=20) :: name='s_elg_csput_a' + character(len=20) :: name='s_cuda_elg_csput_a' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit real(psb_dpk_) :: t1,t2,t3 @@ -120,24 +120,24 @@ subroutine psb_s_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_s_elg_csput_a +end subroutine psb_s_cuda_elg_csput_a -subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_s_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csput_v - use psb_s_gpu_vect_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csput_v + use psb_s_cuda_vect_mod #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax @@ -145,7 +145,7 @@ subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act - character(len=20) :: name='s_elg_csput_v' + character(len=20) :: name='s_cuda_elg_csput_v' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw logical :: gpu_invoked @@ -199,11 +199,11 @@ subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) t1=psb_wtime() gpu_invoked = .false. select type (ia) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (ja) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (val) - class is (psb_s_vect_gpu) + class is (psb_s_vect_cuda) if (a%is_host()) call a%sync() if (val%is_host()) call val%sync() if (ia%is_host()) call ia%sync() @@ -245,4 +245,4 @@ subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_s_elg_csput_v +end subroutine psb_s_cuda_elg_csput_v diff --git a/cuda/impl/psb_c_elg_from_gpu.F90 b/cuda/impl/psb_s_cuda_elg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_c_elg_from_gpu.F90 rename to cuda/impl/psb_s_cuda_elg_from_gpu.F90 index eda65380..bdc55790 100644 --- a/cuda/impl/psb_c_elg_from_gpu.F90 +++ b/cuda/impl/psb_s_cuda_elg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_from_gpu(a,info) +subroutine psb_s_cuda_elg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_from_gpu + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_from_gpu #else - use psb_c_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize @@ -71,4 +71,4 @@ subroutine psb_c_elg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_c_elg_from_gpu +end subroutine psb_s_cuda_elg_from_gpu diff --git a/cuda/impl/psb_s_elg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 similarity index 89% rename from cuda/impl/psb_s_elg_inner_vect_sv.F90 rename to cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 index 83c79cf3..79e546f5 100644 --- a/cuda/impl/psb_s_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_s_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_inner_vect_sv + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_inner_vect_sv #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans integer(psb_ipk_) :: err_act - character(len=20) :: name='s_elg_inner_vect_sv' + character(len=20) :: name='s_cuda_elg_inner_vect_sv' logical, parameter :: debug=.false. real(psb_spk_), allocatable :: rx(:), ry(:) @@ -86,4 +86,4 @@ subroutine psb_s_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_elg_inner_vect_sv +end subroutine psb_s_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_s_elg_mold.F90 b/cuda/impl/psb_s_cuda_elg_mold.F90 similarity index 89% rename from cuda/impl/psb_s_elg_mold.F90 rename to cuda/impl/psb_s_cuda_elg_mold.F90 index a481d605..dc8730bb 100644 --- a/cuda/impl/psb_s_elg_mold.F90 +++ b/cuda/impl/psb_s_cuda_elg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_elg_mold(a,b,info) +subroutine psb_s_cuda_elg_mold(a,b,info) use psb_base_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_mold + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_mold implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_elg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_elg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_elg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_s_elg_mold(a,b,info) return -end subroutine psb_s_elg_mold +end subroutine psb_s_cuda_elg_mold diff --git a/cuda/impl/psb_s_elg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 similarity index 89% rename from cuda/impl/psb_s_elg_reallocate_nz.F90 rename to cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 index 22916852..3f34fcec 100644 --- a/cuda/impl/psb_s_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_s_elg_reallocate_nz(nz,a) +subroutine psb_s_cuda_elg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_reallocate_nz + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_reallocate_nz #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='s_elg_reallocate_nz' + character(len=20) :: name='s_cuda_elg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -76,4 +76,4 @@ subroutine psb_s_elg_reallocate_nz(nz,a) return -end subroutine psb_s_elg_reallocate_nz +end subroutine psb_s_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_s_elg_scal.F90 b/cuda/impl/psb_s_cuda_elg_scal.F90 similarity index 91% rename from cuda/impl/psb_s_elg_scal.F90 rename to cuda/impl/psb_s_cuda_elg_scal.F90 index 913ae47e..cd6e1a5b 100644 --- a/cuda/impl/psb_s_elg_scal.F90 +++ b/cuda/impl/psb_s_cuda_elg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_scal(d,a,info,side) +subroutine psb_s_cuda_elg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_scal + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_scal #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -75,4 +75,4 @@ subroutine psb_s_elg_scal(d,a,info,side) return -end subroutine psb_s_elg_scal +end subroutine psb_s_cuda_elg_scal diff --git a/cuda/impl/psb_s_elg_scals.F90 b/cuda/impl/psb_s_cuda_elg_scals.F90 similarity index 90% rename from cuda/impl/psb_s_elg_scals.F90 rename to cuda/impl/psb_s_cuda_elg_scals.F90 index 8261fc94..4ee8a64d 100644 --- a/cuda/impl/psb_s_elg_scals.F90 +++ b/cuda/impl/psb_s_cuda_elg_scals.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_scals(d,a,info) +subroutine psb_s_cuda_elg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_scals + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_scals #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_s_elg_scals(d,a,info) return -end subroutine psb_s_elg_scals +end subroutine psb_s_cuda_elg_scals diff --git a/cuda/impl/psb_s_elg_to_gpu.F90 b/cuda/impl/psb_s_cuda_elg_to_gpu.F90 similarity index 93% rename from cuda/impl/psb_s_elg_to_gpu.F90 rename to cuda/impl/psb_s_cuda_elg_to_gpu.F90 index bf86343b..7d04d2b0 100644 --- a/cuda/impl/psb_s_elg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_elg_to_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_to_gpu(a,info,nzrm) +subroutine psb_s_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_to_gpu + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_to_gpu #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -90,4 +90,4 @@ subroutine psb_s_elg_to_gpu(a,info,nzrm) call a%set_sync() #endif -end subroutine psb_s_elg_to_gpu +end subroutine psb_s_cuda_elg_to_gpu diff --git a/cuda/impl/psb_z_elg_trim.f90 b/cuda/impl/psb_s_cuda_elg_trim.f90 similarity index 92% rename from cuda/impl/psb_z_elg_trim.f90 rename to cuda/impl/psb_s_cuda_elg_trim.f90 index 9bd43312..516aebc4 100644 --- a/cuda/impl/psb_z_elg_trim.f90 +++ b/cuda/impl/psb_s_cuda_elg_trim.f90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_elg_trim(a) +subroutine psb_s_cuda_elg_trim(a) use psb_base_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_trim + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_trim implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld character(len=20) :: name='trim' logical, parameter :: debug=.false. @@ -59,4 +59,4 @@ subroutine psb_z_elg_trim(a) return -end subroutine psb_z_elg_trim +end subroutine psb_s_cuda_elg_trim diff --git a/cuda/impl/psb_s_elg_vect_mv.F90 b/cuda/impl/psb_s_cuda_elg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_s_elg_vect_mv.F90 rename to cuda/impl/psb_s_cuda_elg_vect_mv.F90 index f8d297d1..dad62418 100644 --- a/cuda/impl/psb_s_elg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_elg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_vect_mv + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_vect_mv #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_elg_vect_mv' + character(len=20) :: name='s_cuda_elg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= szero) then @@ -128,4 +128,4 @@ subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_elg_vect_mv +end subroutine psb_s_cuda_elg_vect_mv diff --git a/cuda/impl/psb_s_hdiag_csmv.F90 b/cuda/impl/psb_s_cuda_hdiag_csmv.F90 similarity index 92% rename from cuda/impl/psb_s_hdiag_csmv.F90 rename to cuda/impl/psb_s_cuda_hdiag_csmv.F90 index 3320901c..8e7e4931 100644 --- a/cuda/impl/psb_s_hdiag_csmv.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_csmv + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_csmv #else - use psb_s_hdiag_mat_mod + use psb_s_cuda_hdiag_mat_mod #endif implicit none - class(psb_s_hdiag_sparse_mat), intent(in) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='s_hdiag_csmv' + character(len=20) :: name='s_cuda_hdiag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hdiag_csmv +end subroutine psb_s_cuda_hdiag_csmv diff --git a/cuda/impl/psb_s_hdiag_mold.F90 b/cuda/impl/psb_s_cuda_hdiag_mold.F90 similarity index 88% rename from cuda/impl/psb_s_hdiag_mold.F90 rename to cuda/impl/psb_s_cuda_hdiag_mold.F90 index 1486b17e..e662b07b 100644 --- a/cuda/impl/psb_s_hdiag_mold.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_hdiag_mold(a,b,info) +subroutine psb_s_cuda_hdiag_mold(a,b,info) use psb_base_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_mold + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_mold implicit none - class(psb_s_hdiag_sparse_mat), intent(in) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_hdiag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_hdiag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_hdiag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_s_hdiag_mold(a,b,info) return -end subroutine psb_s_hdiag_mold +end subroutine psb_s_cuda_hdiag_mold diff --git a/cuda/impl/psb_z_hdiag_to_gpu.F90 b/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 similarity index 92% rename from cuda/impl/psb_z_hdiag_to_gpu.F90 rename to cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 index 622a0141..5fe493aa 100644 --- a/cuda/impl/psb_z_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hdiag_to_gpu(a,info) +subroutine psb_s_cuda_hdiag_to_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_to_gpu + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_to_gpu #else - use psb_z_hdiag_mat_mod + use psb_s_cuda_hdiag_mat_mod #endif use iso_c_binding implicit none - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight #ifdef HAVE_SPGPU @@ -83,4 +83,4 @@ subroutine psb_z_hdiag_to_gpu(a,info) #endif -end subroutine psb_z_hdiag_to_gpu +end subroutine psb_s_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_s_hdiag_vect_mv.F90 b/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_s_hdiag_vect_mv.F90 rename to cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 index ac261e92..3496a637 100644 --- a/cuda/impl/psb_s_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_vect_mv + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_vect_mv #else - use psb_s_hdiag_mat_mod + use psb_s_cuda_hdiag_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_hdiag_sparse_mat), intent(in) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hdiag_vect_mv' + character(len=20) :: name='s_cuda_hdiag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hdiag_vect_mv +end subroutine psb_s_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_s_hlg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_s_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 index c7e430f1..3f2765c4 100644 --- a/cuda/impl/psb_s_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hlg_allocate_mnnz(m,n,a,nz) +subroutine psb_s_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_allocate_mnnz + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_allocate_mnnz #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -68,4 +68,4 @@ subroutine psb_s_hlg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_s_hlg_allocate_mnnz +end subroutine psb_s_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_s_hlg_csmm.F90 b/cuda/impl/psb_s_cuda_hlg_csmm.F90 similarity index 93% rename from cuda/impl/psb_s_hlg_csmm.F90 rename to cuda/impl/psb_s_cuda_hlg_csmm.F90 index 126b17e6..2e274c22 100644 --- a/cuda/impl/psb_s_hlg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_hlg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_csmm + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_csmm #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hlg_csmm' + character(len=20) :: name='s_cuda_hlg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -129,4 +129,4 @@ subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hlg_csmm +end subroutine psb_s_cuda_hlg_csmm diff --git a/cuda/impl/psb_s_hlg_csmv.F90 b/cuda/impl/psb_s_cuda_hlg_csmv.F90 similarity index 93% rename from cuda/impl/psb_s_hlg_csmv.F90 rename to cuda/impl/psb_s_cuda_hlg_csmv.F90 index 2a7f5a4d..56ea8cdb 100644 --- a/cuda/impl/psb_s_hlg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_hlg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_hlg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_csmv + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_csmv #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_hlg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='s_hlg_csmv' + character(len=20) :: name='s_cuda_hlg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -132,4 +132,4 @@ subroutine psb_s_hlg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hlg_csmv +end subroutine psb_s_cuda_hlg_csmv diff --git a/cuda/impl/psb_c_hlg_from_gpu.F90 b/cuda/impl/psb_s_cuda_hlg_from_gpu.F90 similarity index 92% rename from cuda/impl/psb_c_hlg_from_gpu.F90 rename to cuda/impl/psb_s_cuda_hlg_from_gpu.F90 index 85f337ff..14ab19b7 100644 --- a/cuda/impl/psb_c_hlg_from_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hlg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_hlg_from_gpu(a,info) +subroutine psb_s_cuda_hlg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_from_gpu + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_from_gpu #else - use psb_c_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr @@ -73,4 +73,4 @@ subroutine psb_c_hlg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_c_hlg_from_gpu +end subroutine psb_s_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_s_hlg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_s_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 index d545eb02..a9f4f743 100644 --- a/cuda/impl/psb_s_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_inner_vect_sv + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_inner_vect_sv #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -78,4 +78,4 @@ subroutine psb_s_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hlg_inner_vect_sv +end subroutine psb_s_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_s_hlg_mold.F90 b/cuda/impl/psb_s_cuda_hlg_mold.F90 similarity index 89% rename from cuda/impl/psb_s_hlg_mold.F90 rename to cuda/impl/psb_s_cuda_hlg_mold.F90 index c5dc4774..90e9cebf 100644 --- a/cuda/impl/psb_s_hlg_mold.F90 +++ b/cuda/impl/psb_s_cuda_hlg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_hlg_mold(a,b,info) +subroutine psb_s_cuda_hlg_mold(a,b,info) use psb_base_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_mold + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_mold implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer, intent(out) :: info Integer :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_hlg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_hlg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_hlg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -61,4 +61,4 @@ subroutine psb_s_hlg_mold(a,b,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_s_hlg_mold +end subroutine psb_s_cuda_hlg_mold diff --git a/cuda/impl/psb_s_hlg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_s_hlg_reallocate_nz.F90 rename to cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 index 19cd95df..d5b9333c 100644 --- a/cuda/impl/psb_s_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_s_hlg_reallocate_nz(nz,a) +subroutine psb_s_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_reallocate_nz + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_reallocate_nz #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='s_hlg_reallocate_nz' + character(len=20) :: name='s_cuda_hlg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -64,4 +64,4 @@ subroutine psb_s_hlg_reallocate_nz(nz,a) return -end subroutine psb_s_hlg_reallocate_nz +end subroutine psb_s_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_s_hlg_scal.F90 b/cuda/impl/psb_s_cuda_hlg_scal.F90 similarity index 91% rename from cuda/impl/psb_s_hlg_scal.F90 rename to cuda/impl/psb_s_cuda_hlg_scal.F90 index cd389baa..e803a63d 100644 --- a/cuda/impl/psb_s_hlg_scal.F90 +++ b/cuda/impl/psb_s_cuda_hlg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_hlg_scal(d,a,info,side) +subroutine psb_s_cuda_hlg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_scal + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_scal #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,4 +72,4 @@ subroutine psb_s_hlg_scal(d,a,info,side) return -end subroutine psb_s_hlg_scal +end subroutine psb_s_cuda_hlg_scal diff --git a/cuda/impl/psb_s_hlg_scals.F90 b/cuda/impl/psb_s_cuda_hlg_scals.F90 similarity index 91% rename from cuda/impl/psb_s_hlg_scals.F90 rename to cuda/impl/psb_s_cuda_hlg_scals.F90 index 256fac3e..eec592e1 100644 --- a/cuda/impl/psb_s_hlg_scals.F90 +++ b/cuda/impl/psb_s_cuda_hlg_scals.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hlg_scals(d,a,info) +subroutine psb_s_cuda_hlg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_scals + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_scals #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_s_hlg_scals(d,a,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_s_hlg_scals +end subroutine psb_s_cuda_hlg_scals diff --git a/cuda/impl/psb_s_hlg_to_gpu.F90 b/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_s_hlg_to_gpu.F90 rename to cuda/impl/psb_s_cuda_hlg_to_gpu.F90 index 139482c2..14a2a629 100644 --- a/cuda/impl/psb_s_hlg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hlg_to_gpu(a,info,nzrm) +subroutine psb_s_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_to_gpu + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_to_gpu #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -65,4 +65,4 @@ subroutine psb_s_hlg_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_s_hlg_to_gpu +end subroutine psb_s_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_s_hlg_vect_mv.F90 b/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_s_hlg_vect_mv.F90 rename to cuda/impl/psb_s_cuda_hlg_vect_mv.F90 index 52f322aa..2b964f91 100644 --- a/cuda/impl/psb_s_hlg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_vect_mv + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_vect_mv #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hlg_vect_mv' + character(len=20) :: name='s_cuda_hlg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -126,4 +126,4 @@ subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hlg_vect_mv +end subroutine psb_s_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_s_hybg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_s_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 index f2b79c77..0cf1e2bc 100644 --- a/cuda/impl/psb_s_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_allocate_mnnz(m,n,a,nz) +subroutine psb_s_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_allocate_mnnz + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_allocate_mnnz #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,5 +65,5 @@ subroutine psb_s_hybg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_s_hybg_allocate_mnnz +end subroutine psb_s_cuda_hybg_allocate_mnnz #endif diff --git a/cuda/impl/psb_s_hybg_csmm.F90 b/cuda/impl/psb_s_cuda_hybg_csmm.F90 similarity index 93% rename from cuda/impl/psb_s_hybg_csmm.F90 rename to cuda/impl/psb_s_cuda_hybg_csmm.F90 index 9de67633..f89df384 100644 --- a/cuda/impl/psb_s_hybg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_hybg_csmm.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_csmm + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_csmm #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hybg_csmm' + character(len=20) :: name='s_cuda_hybg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,5 +131,5 @@ subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hybg_csmm +end subroutine psb_s_cuda_hybg_csmm #endif diff --git a/cuda/impl/psb_s_hybg_csmv.F90 b/cuda/impl/psb_s_cuda_hybg_csmv.F90 similarity index 93% rename from cuda/impl/psb_s_hybg_csmv.F90 rename to cuda/impl/psb_s_cuda_hybg_csmv.F90 index d20740a6..01642146 100644 --- a/cuda/impl/psb_s_hybg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_hybg_csmv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_csmv + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_csmv #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_s_hybg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hybg_csmv' + character(len=20) :: name='s_cuda_hybg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -134,5 +134,5 @@ subroutine psb_s_hybg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hybg_csmv +end subroutine psb_s_cuda_hybg_csmv #endif diff --git a/cuda/impl/psb_s_hybg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_s_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 index 95920fc9..f0006f5c 100644 --- a/cuda/impl/psb_s_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_inner_vect_sv + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_inner_vect_sv #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -52,7 +52,7 @@ subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='s_hybg_inner_vect_sv' + character(len=20) :: name='s_cuda_hybg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -84,9 +84,9 @@ subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= szero) then if (yy%is_host()) call yy%sync() @@ -134,5 +134,5 @@ subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hybg_inner_vect_sv +end subroutine psb_s_cuda_hybg_inner_vect_sv #endif diff --git a/cuda/impl/psb_s_hybg_mold.F90 b/cuda/impl/psb_s_cuda_hybg_mold.F90 similarity index 89% rename from cuda/impl/psb_s_hybg_mold.F90 rename to cuda/impl/psb_s_cuda_hybg_mold.F90 index 882990c0..4a1fc64e 100644 --- a/cuda/impl/psb_s_hybg_mold.F90 +++ b/cuda/impl/psb_s_cuda_hybg_mold.F90 @@ -30,12 +30,12 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_mold(a,b,info) +subroutine psb_s_cuda_hybg_mold(a,b,info) use psb_base_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_mold + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_mold implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_hybg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_hybg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_hybg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,5 +62,5 @@ subroutine psb_s_hybg_mold(a,b,info) return -end subroutine psb_s_hybg_mold +end subroutine psb_s_cuda_hybg_mold #endif diff --git a/cuda/impl/psb_s_hybg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 similarity index 88% rename from cuda/impl/psb_s_hybg_reallocate_nz.F90 rename to cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 index 46079a92..7ee15f52 100644 --- a/cuda/impl/psb_s_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_reallocate_nz(nz,a) +subroutine psb_s_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_reallocate_nz + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_reallocate_nz #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='s_hybg_reallocate_nz' + character(len=20) :: name='s_cuda_hybg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,5 +67,5 @@ subroutine psb_s_hybg_reallocate_nz(nz,a) return -end subroutine psb_s_hybg_reallocate_nz +end subroutine psb_s_cuda_hybg_reallocate_nz #endif diff --git a/cuda/impl/psb_s_hybg_scal.F90 b/cuda/impl/psb_s_cuda_hybg_scal.F90 similarity index 91% rename from cuda/impl/psb_s_hybg_scal.F90 rename to cuda/impl/psb_s_cuda_hybg_scal.F90 index a55a8b2c..7a3978b7 100644 --- a/cuda/impl/psb_s_hybg_scal.F90 +++ b/cuda/impl/psb_s_cuda_hybg_scal.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_scal(d,a,info,side) +subroutine psb_s_cuda_hybg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_scal + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_scal #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,5 +72,5 @@ subroutine psb_s_hybg_scal(d,a,info,side) return -end subroutine psb_s_hybg_scal +end subroutine psb_s_cuda_hybg_scal #endif diff --git a/cuda/impl/psb_s_hybg_scals.F90 b/cuda/impl/psb_s_cuda_hybg_scals.F90 similarity index 91% rename from cuda/impl/psb_s_hybg_scals.F90 rename to cuda/impl/psb_s_cuda_hybg_scals.F90 index ae92166f..a19ae3f6 100644 --- a/cuda/impl/psb_s_hybg_scals.F90 +++ b/cuda/impl/psb_s_cuda_hybg_scals.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_scals(d,a,info) +subroutine psb_s_cuda_hybg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_scals + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_scals #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -72,5 +72,5 @@ subroutine psb_s_hybg_scals(d,a,info) return -end subroutine psb_s_hybg_scals +end subroutine psb_s_cuda_hybg_scals #endif diff --git a/cuda/impl/psb_s_hybg_to_gpu.F90 b/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 similarity index 96% rename from cuda/impl/psb_s_hybg_to_gpu.F90 rename to cuda/impl/psb_s_cuda_hybg_to_gpu.F90 index bfb9b261..ec415176 100644 --- a/cuda/impl/psb_s_hybg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_to_gpu(a,info,nzrm) +subroutine psb_s_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_to_gpu + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_to_gpu #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -150,5 +150,5 @@ subroutine psb_s_hybg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_s_hybg_to_gpu +end subroutine psb_s_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_s_hybg_vect_mv.F90 b/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_s_hybg_vect_mv.F90 rename to cuda/impl/psb_s_cuda_hybg_vect_mv.F90 index 5fe102f6..a83c4561 100644 --- a/cuda/impl/psb_s_hybg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 @@ -30,20 +30,20 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_vect_mv + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_vect_mv #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -53,7 +53,7 @@ subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hybg_vect_mv' + character(len=20) :: name='s_cuda_hybg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= szero) then if (yy%is_host()) call yy%sync() @@ -123,5 +123,5 @@ subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hybg_vect_mv +end subroutine psb_s_cuda_hybg_vect_mv #endif diff --git a/cuda/impl/psb_s_mv_csrg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_mv_csrg_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 index 01c9db06..a9e297bd 100644 --- a/cuda/impl/psb_s_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_mv_csrg_from_coo(a,b,info) +subroutine psb_s_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_mv_csrg_from_coo + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_mv_csrg_from_coo #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -62,4 +62,4 @@ subroutine psb_s_mv_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_mv_csrg_from_coo +end subroutine psb_s_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_s_mv_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_s_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 index 0ac28af3..54bc0ae4 100644 --- a/cuda/impl/psb_s_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_mv_csrg_from_fmt(a,b,info) +subroutine psb_s_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_mv_csrg_from_fmt + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_mv_csrg_from_fmt #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer, intent(out) :: info @@ -60,4 +60,4 @@ subroutine psb_s_mv_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_s_mv_csrg_from_fmt +end subroutine psb_s_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_s_mv_diag_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_mv_diag_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 index f51607e5..fda60d96 100644 --- a/cuda/impl/psb_s_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_s_mv_diag_from_coo(a,b,info) +subroutine psb_s_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_mv_diag_from_coo + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_mv_diag_from_coo #else - use psb_s_diag_mat_mod + use psb_s_cuda_diag_mat_mod #endif implicit none - class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -66,4 +66,4 @@ subroutine psb_s_mv_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_mv_diag_from_coo +end subroutine psb_s_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_s_mv_elg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_mv_elg_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 index ac153f6c..447e2971 100644 --- a/cuda/impl/psb_s_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_mv_elg_from_coo(a,b,info) +subroutine psb_s_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_mv_elg_from_coo + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_mv_elg_from_coo #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_s_mv_elg_from_coo(a,b,info) return -end subroutine psb_s_mv_elg_from_coo +end subroutine psb_s_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_s_mv_elg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 similarity index 92% rename from cuda/impl/psb_s_mv_elg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 index 9238544c..e88080dd 100644 --- a/cuda/impl/psb_s_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_mv_elg_from_fmt(a,b,info) +subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_mv_elg_from_fmt + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_mv_elg_from_fmt #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -96,4 +96,4 @@ subroutine psb_s_mv_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_s_mv_elg_from_fmt +end subroutine psb_s_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_s_mv_hdiag_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_s_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 index dcbcfe4d..f3252eb2 100644 --- a/cuda/impl/psb_s_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_s_mv_hdiag_from_coo(a,b,info) +subroutine psb_s_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_mv_hdiag_from_coo - use psb_gpu_env_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_s_hdiag_mat_mod + use psb_s_cuda_hdiag_mat_mod #endif implicit none - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_s_mv_hdiag_from_coo(a,b,info) #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_s_hdia_sparse_mat%mv_from_coo(b,info) @@ -71,4 +71,4 @@ subroutine psb_s_mv_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_mv_hdiag_from_coo +end subroutine psb_s_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_s_mv_hlg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 similarity index 88% rename from cuda/impl/psb_s_mv_hlg_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 index dc72a135..9810a85e 100644 --- a/cuda/impl/psb_s_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_s_mv_hlg_from_coo(a,b,info) +subroutine psb_s_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_mv_hlg_from_coo + use psb_cuda_env_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_mv_hlg_from_coo #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_s_mv_hlg_from_coo(a,b,info) return -end subroutine psb_s_mv_hlg_from_coo +end subroutine psb_s_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_s_mv_hlg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_s_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 index bbe42e4a..700dc151 100644 --- a/cuda/impl/psb_s_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_mv_hlg_from_fmt(a,b,info) +subroutine psb_s_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_mv_hlg_from_fmt + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_mv_hlg_from_fmt #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_s_mv_hlg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_s_mv_hlg_from_fmt +end subroutine psb_s_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_s_mv_hybg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_mv_hybg_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 index 7d3197a8..ca9f34c1 100644 --- a/cuda/impl/psb_s_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_mv_hybg_from_coo(a,b,info) +subroutine psb_s_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_mv_hybg_from_coo + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_mv_hybg_from_coo #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -61,5 +61,5 @@ subroutine psb_s_mv_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_mv_hybg_from_coo +end subroutine psb_s_cuda_mv_hybg_from_coo #endif diff --git a/cuda/impl/psb_s_mv_hybg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_s_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 index 51d8a2e6..5ba606af 100644 --- a/cuda/impl/psb_s_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_mv_hybg_from_fmt(a,b,info) +subroutine psb_s_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_mv_hybg_from_fmt + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_mv_hybg_from_fmt #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_s_mv_hybg_from_fmt(a,b,info) call a%to_gpu(info) #endif end select -end subroutine psb_s_mv_hybg_from_fmt +end subroutine psb_s_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/impl/psb_z_cp_csrg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_cp_csrg_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 index c3b0eebd..186190ac 100644 --- a/cuda/impl/psb_z_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_z_cp_csrg_from_coo(a,b,info) +subroutine psb_z_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_cp_csrg_from_coo + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_cp_csrg_from_coo #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_z_cp_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_cp_csrg_from_coo +end subroutine psb_z_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_z_cp_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_z_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 index 218d6c7b..d1e1a82d 100644 --- a/cuda/impl/psb_z_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_z_cp_csrg_from_fmt(a,b,info) +subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_cp_csrg_from_fmt + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_cp_csrg_from_fmt #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif !use iso_c_binding implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_z_cp_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_z_cp_csrg_from_fmt +end subroutine psb_z_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_z_cp_diag_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_cp_diag_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 index 013e88cd..c303b127 100644 --- a/cuda/impl/psb_z_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_cp_diag_from_coo(a,b,info) +subroutine psb_z_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_cp_diag_from_coo + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_cp_diag_from_coo #else - use psb_z_diag_mat_mod + use psb_z_cuda_diag_mat_mod #endif implicit none - class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -61,4 +61,4 @@ subroutine psb_z_cp_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_cp_diag_from_coo +end subroutine psb_z_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_z_cp_elg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 similarity index 94% rename from cuda/impl/psb_z_cp_elg_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 index c9b61a99..4b18b89b 100644 --- a/cuda/impl/psb_z_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_z_cp_elg_from_coo(a,b,info) +subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_cp_elg_from_coo + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_cp_elg_from_coo use psi_ext_util_mod - use psb_gpu_env_mod + use psb_cuda_env_mod #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -58,7 +58,7 @@ subroutine psb_z_cp_elg_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - hacksize = max(1,psb_gpu_WarpSize()) + hacksize = max(1,psb_cuda_WarpSize()) #else hacksize = 1 #endif @@ -181,4 +181,4 @@ contains end subroutine psi_z_count_ell_from_coo -end subroutine psb_z_cp_elg_from_coo +end subroutine psb_z_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_z_cp_elg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 similarity index 93% rename from cuda/impl/psb_z_cp_elg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 index 23468b8a..6fa91de6 100644 --- a/cuda/impl/psb_z_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_cp_elg_from_fmt(a,b,info) +subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_cp_elg_from_fmt + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_cp_elg_from_fmt #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -98,4 +98,4 @@ subroutine psb_z_cp_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_z_cp_elg_from_fmt +end subroutine psb_z_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_z_cp_hdiag_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_z_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 index b44c2854..c94d8824 100644 --- a/cuda/impl/psb_z_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_z_cp_hdiag_from_coo(a,b,info) +subroutine psb_z_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_cp_hdiag_from_coo - use psb_gpu_env_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_z_hdiag_mat_mod + use psb_z_cuda_hdiag_mat_mod #endif implicit none - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_z_cp_hdiag_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_z_hdia_sparse_mat%cp_from_coo(b,info) @@ -70,4 +70,4 @@ subroutine psb_z_cp_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_cp_hdiag_from_coo +end subroutine psb_z_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_z_cp_hlg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 similarity index 95% rename from cuda/impl/psb_z_cp_hlg_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 index 51d0c8e6..1607f1b6 100644 --- a/cuda/impl/psb_z_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_z_cp_hlg_from_coo(a,b,info) +subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_cp_hlg_from_coo + use psb_cuda_env_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_cp_hlg_from_coo #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -62,7 +62,7 @@ subroutine psb_z_cp_hlg_from_coo(a,b,info) debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() #ifdef HAVE_SPGPU - hksz = max(1,psb_gpu_WarpSize()) + hksz = max(1,psb_cuda_WarpSize()) #else hksz = psi_get_hksz() #endif @@ -195,4 +195,4 @@ contains !!$ write(*,*) 'End of psi_comput_hckoff ',info end subroutine psi_compute_hckoff_from_coo -end subroutine psb_z_cp_hlg_from_coo +end subroutine psb_z_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_z_cp_hlg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 similarity index 90% rename from cuda/impl/psb_z_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 index a6dd5970..e8c1f95d 100644 --- a/cuda/impl/psb_z_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_cp_hlg_from_fmt(a,b,info) +subroutine psb_z_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_cp_hlg_from_fmt + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_cp_hlg_from_fmt #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -65,4 +65,4 @@ subroutine psb_z_cp_hlg_from_fmt(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_cp_hlg_from_fmt +end subroutine psb_z_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_z_cp_hybg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_cp_hybg_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 index ebb6f60a..6031526a 100644 --- a/cuda/impl/psb_z_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_cp_hybg_from_coo(a,b,info) +subroutine psb_z_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_cp_hybg_from_coo + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_cp_hybg_from_coo #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -60,5 +60,5 @@ subroutine psb_z_cp_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_cp_hybg_from_coo +end subroutine psb_z_cuda_cp_hybg_from_coo #endif diff --git a/cuda/impl/psb_z_cp_hybg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_z_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 index 82f2ac65..0202ef24 100644 --- a/cuda/impl/psb_z_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_cp_hybg_from_fmt(a,b,info) +subroutine psb_z_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_cp_hybg_from_fmt + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_cp_hybg_from_fmt #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_z_cp_hybg_from_fmt(a,b,info) #endif end select -end subroutine psb_z_cp_hybg_from_fmt +end subroutine psb_z_cuda_cp_hybg_from_fmt #endif diff --git a/cuda/impl/psb_z_csrg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 similarity index 89% rename from cuda/impl/psb_z_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 index 8cb2ccb1..a7533e59 100644 --- a/cuda/impl/psb_z_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_csrg_allocate_mnnz(m,n,a,nz) +subroutine psb_z_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_allocate_mnnz + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_allocate_mnnz #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,4 +65,4 @@ subroutine psb_z_csrg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_z_csrg_allocate_mnnz +end subroutine psb_z_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_z_csrg_csmm.F90 b/cuda/impl/psb_z_cuda_csrg_csmm.F90 similarity index 94% rename from cuda/impl/psb_z_csrg_csmm.F90 rename to cuda/impl/psb_z_cuda_csrg_csmm.F90 index eb8a4d7f..49fb9fcf 100644 --- a/cuda/impl/psb_z_csrg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_csrg_csmm.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_csrg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_csmm + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_csmm #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -131,4 +131,4 @@ subroutine psb_z_csrg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_csrg_csmm +end subroutine psb_z_cuda_csrg_csmm diff --git a/cuda/impl/psb_z_csrg_csmv.F90 b/cuda/impl/psb_z_cuda_csrg_csmv.F90 similarity index 93% rename from cuda/impl/psb_z_csrg_csmv.F90 rename to cuda/impl/psb_z_cuda_csrg_csmv.F90 index 10546eb1..54ad6f4f 100644 --- a/cuda/impl/psb_z_csrg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_csrg_csmv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_csrg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_csmv + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_csmv #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_z_csrg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_csrg_csmv' + character(len=20) :: name='z_cuda_csrg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -136,4 +136,4 @@ subroutine psb_z_csrg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_csrg_csmv +end subroutine psb_z_cuda_csrg_csmv diff --git a/cuda/impl/psb_d_csrg_from_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_d_csrg_from_gpu.F90 rename to cuda/impl/psb_z_cuda_csrg_from_gpu.F90 index 9c0237f0..bb3b49d5 100644 --- a/cuda/impl/psb_d_csrg_from_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_csrg_from_gpu(a,info) +subroutine psb_z_cuda_csrg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_from_gpu + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_from_gpu #else - use psb_d_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, n, nz @@ -70,4 +70,4 @@ subroutine psb_d_csrg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_d_csrg_from_gpu +end subroutine psb_z_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_z_csrg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_z_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 index 75d6800b..6c7b1fcb 100644 --- a/cuda/impl/psb_z_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_inner_vect_sv + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_inner_vect_sv #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -51,7 +51,7 @@ subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='z_csrg_inner_vect_sv' + character(len=20) :: name='z_cuda_csrg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -83,9 +83,9 @@ subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -133,4 +133,4 @@ subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_csrg_inner_vect_sv +end subroutine psb_z_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_z_csrg_mold.F90 b/cuda/impl/psb_z_cuda_csrg_mold.F90 similarity index 88% rename from cuda/impl/psb_z_csrg_mold.F90 rename to cuda/impl/psb_z_cuda_csrg_mold.F90 index e83deb3f..23bb658a 100644 --- a/cuda/impl/psb_z_csrg_mold.F90 +++ b/cuda/impl/psb_z_cuda_csrg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_csrg_mold(a,b,info) +subroutine psb_z_cuda_csrg_mold(a,b,info) use psb_base_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_mold + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_mold implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_csrg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_csrg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_csrg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_z_csrg_mold(a,b,info) return -end subroutine psb_z_csrg_mold +end subroutine psb_z_cuda_csrg_mold diff --git a/cuda/impl/psb_z_csrg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_z_csrg_reallocate_nz.F90 rename to cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 index c2509c22..61ae0f59 100644 --- a/cuda/impl/psb_z_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_z_csrg_reallocate_nz(nz,a) +subroutine psb_z_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_reallocate_nz + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_reallocate_nz #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='z_csrg_reallocate_nz' + character(len=20) :: name='z_cuda_csrg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,4 +67,4 @@ subroutine psb_z_csrg_reallocate_nz(nz,a) return -end subroutine psb_z_csrg_reallocate_nz +end subroutine psb_z_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_z_csrg_scal.F90 b/cuda/impl/psb_z_cuda_csrg_scal.F90 similarity index 90% rename from cuda/impl/psb_z_csrg_scal.F90 rename to cuda/impl/psb_z_cuda_csrg_scal.F90 index d8ab0ca3..a2099933 100644 --- a/cuda/impl/psb_z_csrg_scal.F90 +++ b/cuda/impl/psb_z_cuda_csrg_scal.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_z_csrg_scal(d,a,info,side) +subroutine psb_z_cuda_csrg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_scal + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_scal #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -70,4 +70,4 @@ subroutine psb_z_csrg_scal(d,a,info,side) return -end subroutine psb_z_csrg_scal +end subroutine psb_z_cuda_csrg_scal diff --git a/cuda/impl/psb_z_csrg_scals.F90 b/cuda/impl/psb_z_cuda_csrg_scals.F90 similarity index 90% rename from cuda/impl/psb_z_csrg_scals.F90 rename to cuda/impl/psb_z_cuda_csrg_scals.F90 index 3d14998d..72fee99b 100644 --- a/cuda/impl/psb_z_csrg_scals.F90 +++ b/cuda/impl/psb_z_cuda_csrg_scals.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_z_csrg_scals(d,a,info) +subroutine psb_z_cuda_csrg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_scals + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_scals #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -68,4 +68,4 @@ subroutine psb_z_csrg_scals(d,a,info) return -end subroutine psb_z_csrg_scals +end subroutine psb_z_cuda_csrg_scals diff --git a/cuda/impl/psb_z_csrg_to_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 similarity index 98% rename from cuda/impl/psb_z_csrg_to_gpu.F90 rename to cuda/impl/psb_z_cuda_csrg_to_gpu.F90 index 4548935d..c6f217ab 100644 --- a/cuda/impl/psb_z_csrg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_z_csrg_to_gpu(a,info,nzrm) +subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_to_gpu + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_to_gpu #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -322,4 +322,4 @@ subroutine psb_z_csrg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_z_csrg_to_gpu +end subroutine psb_z_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_z_csrg_vect_mv.F90 b/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_z_csrg_vect_mv.F90 rename to cuda/impl/psb_z_cuda_csrg_vect_mv.F90 index 0770d448..964134fb 100644 --- a/cuda/impl/psb_z_csrg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_vect_mv + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_vect_mv #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -54,7 +54,7 @@ subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_csrg_vect_mv' + character(len=20) :: name='z_cuda_csrg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= zzero) then if (yy%is_host()) call yy%sync() @@ -122,4 +122,4 @@ subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) 9999 call psb_error_handler(err_act) return -end subroutine psb_z_csrg_vect_mv +end subroutine psb_z_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_z_diag_csmv.F90 b/cuda/impl/psb_z_cuda_diag_csmv.F90 similarity index 92% rename from cuda/impl/psb_z_diag_csmv.F90 rename to cuda/impl/psb_z_cuda_diag_csmv.F90 index 667e1a1f..2e86f0f8 100644 --- a/cuda/impl/psb_z_diag_csmv.F90 +++ b/cuda/impl/psb_z_cuda_diag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_csmv + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_csmv #else - use psb_z_diag_mat_mod + use psb_z_cuda_diag_mat_mod #endif implicit none - class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='z_diag_csmv' + character(len=20) :: name='z_cuda_diag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_diag_csmv +end subroutine psb_z_cuda_diag_csmv diff --git a/cuda/impl/psb_z_diag_mold.F90 b/cuda/impl/psb_z_cuda_diag_mold.F90 similarity index 88% rename from cuda/impl/psb_z_diag_mold.F90 rename to cuda/impl/psb_z_cuda_diag_mold.F90 index 5cd752ce..5b11b41c 100644 --- a/cuda/impl/psb_z_diag_mold.F90 +++ b/cuda/impl/psb_z_cuda_diag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_diag_mold(a,b,info) +subroutine psb_z_cuda_diag_mold(a,b,info) use psb_base_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_mold + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_mold implicit none - class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_diag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_diag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_diag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_z_diag_mold(a,b,info) return -end subroutine psb_z_diag_mold +end subroutine psb_z_cuda_diag_mold diff --git a/cuda/impl/psb_z_diag_to_gpu.F90 b/cuda/impl/psb_z_cuda_diag_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_z_diag_to_gpu.F90 rename to cuda/impl/psb_z_cuda_diag_to_gpu.F90 index 40913624..a28858b5 100644 --- a/cuda/impl/psb_z_diag_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_diag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_diag_to_gpu(a,info,nzrm) +subroutine psb_z_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_to_gpu + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_to_gpu #else - use psb_z_diag_mat_mod + use psb_z_cuda_diag_mat_mod #endif use iso_c_binding implicit none - class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -71,4 +71,4 @@ subroutine psb_z_diag_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_z_diag_to_gpu +end subroutine psb_z_cuda_diag_to_gpu diff --git a/cuda/impl/psb_z_diag_vect_mv.F90 b/cuda/impl/psb_z_cuda_diag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_z_diag_vect_mv.F90 rename to cuda/impl/psb_z_cuda_diag_vect_mv.F90 index b8946491..12f3c3e7 100644 --- a/cuda/impl/psb_z_diag_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_diag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_vect_mv + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_vect_mv #else - use psb_z_diag_mat_mod + use psb_z_cuda_diag_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_diag_vect_mv' + character(len=20) :: name='z_cuda_diag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_diag_vect_mv +end subroutine psb_z_cuda_diag_vect_mv diff --git a/cuda/impl/psb_z_dnsg_mat_impl.F90 b/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 similarity index 77% rename from cuda/impl/psb_z_dnsg_mat_impl.F90 rename to cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 index 407deaa2..c2a641b6 100644 --- a/cuda/impl/psb_z_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_dnsg_vect_mv + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_vect_mv #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(in) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -50,7 +50,7 @@ subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) character :: trans_ complex(psb_dpk_), allocatable :: rx(:), ry(:) Integer(Psb_ipk_) :: err_act, m, n, k - character(len=20) :: name='z_dnsg_vect_mv' + character(len=20) :: name='z_cuda_dnsg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -76,9 +76,9 @@ subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) k = a%get_nrows() end if select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= zzero) then @@ -117,21 +117,21 @@ subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_dnsg_vect_mv +end subroutine psb_z_cuda_dnsg_vect_mv -subroutine psb_z_dnsg_mold(a,b,info) +subroutine psb_z_cuda_dnsg_mold(a,b,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_dnsg_mold + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_mold #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(in) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -145,7 +145,7 @@ subroutine psb_z_dnsg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_dnsg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_dnsg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -158,54 +158,54 @@ subroutine psb_z_dnsg_mold(a,b,info) return -end subroutine psb_z_dnsg_mold +end subroutine psb_z_cuda_dnsg_mold !!$ !!$ interface -!!$ subroutine psb_z_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_z_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type -!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_z_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_dnsg_inner_vect_sv +!!$ end subroutine psb_z_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_z_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_z_dnsg_reallocate_nz +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_z_dnsg_allocate_mnnz +!!$ end subroutine psb_z_cuda_dnsg_allocate_mnnz !!$ end interface -subroutine psb_z_dnsg_to_gpu(a,info) +subroutine psb_z_cuda_dnsg_to_gpu(a,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_dnsg_to_gpu + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_to_gpu #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda logical, parameter :: debug=.false. - character(len=20) :: name='z_dnsg_to_gpu' + character(len=20) :: name='z_cuda_dnsg_to_gpu' call psb_erractionsave(err_act) info = psb_success_ @@ -226,27 +226,27 @@ subroutine psb_z_dnsg_to_gpu(a,info) return -end subroutine psb_z_dnsg_to_gpu +end subroutine psb_z_cuda_dnsg_to_gpu -subroutine psb_z_cp_dnsg_from_coo(a,b,info) +subroutine psb_z_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_cp_dnsg_from_coo + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_cp_dnsg_from_coo #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_dnsg_cp_from_coo' + character(len=20) :: name='z_cuda_dnsg_cp_from_coo' integer(psb_ipk_) :: debug_level, debug_unit logical, parameter :: debug=.false. type(psb_z_coo_sparse_mat) :: tmp @@ -267,27 +267,27 @@ subroutine psb_z_cp_dnsg_from_coo(a,b,info) return -end subroutine psb_z_cp_dnsg_from_coo +end subroutine psb_z_cuda_cp_dnsg_from_coo -subroutine psb_z_cp_dnsg_from_fmt(a,b,info) +subroutine psb_z_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_cp_dnsg_from_fmt + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_cp_dnsg_from_fmt #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info type(psb_z_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_dnsg_cp_from_fmt' + character(len=20) :: name='z_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -341,29 +341,29 @@ subroutine psb_z_cp_dnsg_from_fmt(a,b,info) return -end subroutine psb_z_cp_dnsg_from_fmt +end subroutine psb_z_cuda_cp_dnsg_from_fmt -subroutine psb_z_mv_dnsg_from_coo(a,b,info) +subroutine psb_z_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_mv_dnsg_from_coo + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_mv_dnsg_from_coo #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act logical, parameter :: debug=.false. - character(len=20) :: name='z_dnsg_mv_from_coo' + character(len=20) :: name='z_cuda_dnsg_mv_from_coo' call psb_erractionsave(err_act) info = psb_success_ @@ -382,28 +382,28 @@ subroutine psb_z_mv_dnsg_from_coo(a,b,info) return -end subroutine psb_z_mv_dnsg_from_coo +end subroutine psb_z_cuda_mv_dnsg_from_coo -subroutine psb_z_mv_dnsg_from_fmt(a,b,info) +subroutine psb_z_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_mv_dnsg_from_fmt + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_mv_dnsg_from_fmt #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info type(psb_z_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_dnsg_cp_from_fmt' + character(len=20) :: name='z_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -458,4 +458,4 @@ subroutine psb_z_mv_dnsg_from_fmt(a,b,info) return -end subroutine psb_z_mv_dnsg_from_fmt +end subroutine psb_z_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_z_elg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 similarity index 93% rename from cuda/impl/psb_z_elg_allocate_mnnz.F90 rename to cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 index 39d14dd2..a2e36feb 100644 --- a/cuda/impl/psb_z_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_elg_allocate_mnnz(m,n,a,nz) +subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_allocate_mnnz + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_allocate_mnnz #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -110,4 +110,4 @@ subroutine psb_z_elg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_z_elg_allocate_mnnz +end subroutine psb_z_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_z_elg_asb.f90 b/cuda/impl/psb_z_cuda_elg_asb.f90 similarity index 92% rename from cuda/impl/psb_z_elg_asb.f90 rename to cuda/impl/psb_z_cuda_elg_asb.f90 index 515f579a..511183f5 100644 --- a/cuda/impl/psb_z_elg_asb.f90 +++ b/cuda/impl/psb_z_cuda_elg_asb.f90 @@ -30,13 +30,13 @@ ! -subroutine psb_z_elg_asb(a) +subroutine psb_z_cuda_elg_asb(a) use psb_base_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_asb + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_asb implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: err_act, info character(len=20) :: name='elg_asb' @@ -62,4 +62,4 @@ subroutine psb_z_elg_asb(a) return -end subroutine psb_z_elg_asb +end subroutine psb_z_cuda_elg_asb diff --git a/cuda/impl/psb_z_elg_csmm.F90 b/cuda/impl/psb_z_cuda_elg_csmm.F90 similarity index 93% rename from cuda/impl/psb_z_elg_csmm.F90 rename to cuda/impl/psb_z_cuda_elg_csmm.F90 index aa27419c..d4034b65 100644 --- a/cuda/impl/psb_z_elg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_elg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csmm + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csmm #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_elg_csmm' + character(len=20) :: name='z_cuda_elg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,4 +131,4 @@ subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_elg_csmm +end subroutine psb_z_cuda_elg_csmm diff --git a/cuda/impl/psb_z_elg_csmv.F90 b/cuda/impl/psb_z_cuda_elg_csmv.F90 similarity index 94% rename from cuda/impl/psb_z_elg_csmv.F90 rename to cuda/impl/psb_z_cuda_elg_csmv.F90 index 46bc9615..eba12d16 100644 --- a/cuda/impl/psb_z_elg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_elg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csmv + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csmv #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -133,4 +133,4 @@ subroutine psb_z_elg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_elg_csmv +end subroutine psb_z_cuda_elg_csmv diff --git a/cuda/impl/psb_z_elg_csput.F90 b/cuda/impl/psb_z_cuda_elg_csput.F90 similarity index 89% rename from cuda/impl/psb_z_elg_csput.F90 rename to cuda/impl/psb_z_cuda_elg_csput.F90 index 51080f5b..5a52f4f8 100644 --- a/cuda/impl/psb_z_elg_csput.F90 +++ b/cuda/impl/psb_z_cuda_elg_csput.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_z_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_z_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csput_a + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csput_a #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - character(len=20) :: name='z_elg_csput_a' + character(len=20) :: name='z_cuda_elg_csput_a' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit real(psb_dpk_) :: t1,t2,t3 @@ -120,24 +120,24 @@ subroutine psb_z_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_z_elg_csput_a +end subroutine psb_z_cuda_elg_csput_a -subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_z_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csput_v - use psb_z_gpu_vect_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csput_v + use psb_z_cuda_vect_mod #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax @@ -145,7 +145,7 @@ subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act - character(len=20) :: name='z_elg_csput_v' + character(len=20) :: name='z_cuda_elg_csput_v' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw logical :: gpu_invoked @@ -199,11 +199,11 @@ subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) t1=psb_wtime() gpu_invoked = .false. select type (ia) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (ja) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (val) - class is (psb_z_vect_gpu) + class is (psb_z_vect_cuda) if (a%is_host()) call a%sync() if (val%is_host()) call val%sync() if (ia%is_host()) call ia%sync() @@ -245,4 +245,4 @@ subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_z_elg_csput_v +end subroutine psb_z_cuda_elg_csput_v diff --git a/cuda/impl/psb_z_elg_from_gpu.F90 b/cuda/impl/psb_z_cuda_elg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_z_elg_from_gpu.F90 rename to cuda/impl/psb_z_cuda_elg_from_gpu.F90 index e8670cd4..ffed4349 100644 --- a/cuda/impl/psb_z_elg_from_gpu.F90 +++ b/cuda/impl/psb_z_cuda_elg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_from_gpu(a,info) +subroutine psb_z_cuda_elg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_from_gpu + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_from_gpu #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize @@ -71,4 +71,4 @@ subroutine psb_z_elg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_z_elg_from_gpu +end subroutine psb_z_cuda_elg_from_gpu diff --git a/cuda/impl/psb_z_elg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 similarity index 89% rename from cuda/impl/psb_z_elg_inner_vect_sv.F90 rename to cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 index 66d7eed8..7564d5dd 100644 --- a/cuda/impl/psb_z_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_z_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_inner_vect_sv + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_inner_vect_sv #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans integer(psb_ipk_) :: err_act - character(len=20) :: name='z_elg_inner_vect_sv' + character(len=20) :: name='z_cuda_elg_inner_vect_sv' logical, parameter :: debug=.false. complex(psb_dpk_), allocatable :: rx(:), ry(:) @@ -86,4 +86,4 @@ subroutine psb_z_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_elg_inner_vect_sv +end subroutine psb_z_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_z_elg_mold.F90 b/cuda/impl/psb_z_cuda_elg_mold.F90 similarity index 89% rename from cuda/impl/psb_z_elg_mold.F90 rename to cuda/impl/psb_z_cuda_elg_mold.F90 index 1a5ebe54..e027c9f2 100644 --- a/cuda/impl/psb_z_elg_mold.F90 +++ b/cuda/impl/psb_z_cuda_elg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_elg_mold(a,b,info) +subroutine psb_z_cuda_elg_mold(a,b,info) use psb_base_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_mold + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_mold implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_elg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_elg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_elg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_z_elg_mold(a,b,info) return -end subroutine psb_z_elg_mold +end subroutine psb_z_cuda_elg_mold diff --git a/cuda/impl/psb_z_elg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 similarity index 89% rename from cuda/impl/psb_z_elg_reallocate_nz.F90 rename to cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 index f6bc194f..16cebe70 100644 --- a/cuda/impl/psb_z_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_z_elg_reallocate_nz(nz,a) +subroutine psb_z_cuda_elg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_reallocate_nz + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_reallocate_nz #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='z_elg_reallocate_nz' + character(len=20) :: name='z_cuda_elg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -76,4 +76,4 @@ subroutine psb_z_elg_reallocate_nz(nz,a) return -end subroutine psb_z_elg_reallocate_nz +end subroutine psb_z_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_z_elg_scal.F90 b/cuda/impl/psb_z_cuda_elg_scal.F90 similarity index 91% rename from cuda/impl/psb_z_elg_scal.F90 rename to cuda/impl/psb_z_cuda_elg_scal.F90 index eed9007a..4802aaaa 100644 --- a/cuda/impl/psb_z_elg_scal.F90 +++ b/cuda/impl/psb_z_cuda_elg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_scal(d,a,info,side) +subroutine psb_z_cuda_elg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_scal + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_scal #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -75,4 +75,4 @@ subroutine psb_z_elg_scal(d,a,info,side) return -end subroutine psb_z_elg_scal +end subroutine psb_z_cuda_elg_scal diff --git a/cuda/impl/psb_z_elg_scals.F90 b/cuda/impl/psb_z_cuda_elg_scals.F90 similarity index 90% rename from cuda/impl/psb_z_elg_scals.F90 rename to cuda/impl/psb_z_cuda_elg_scals.F90 index 1e3f3682..5db823da 100644 --- a/cuda/impl/psb_z_elg_scals.F90 +++ b/cuda/impl/psb_z_cuda_elg_scals.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_scals(d,a,info) +subroutine psb_z_cuda_elg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_scals + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_scals #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_z_elg_scals(d,a,info) return -end subroutine psb_z_elg_scals +end subroutine psb_z_cuda_elg_scals diff --git a/cuda/impl/psb_z_elg_to_gpu.F90 b/cuda/impl/psb_z_cuda_elg_to_gpu.F90 similarity index 93% rename from cuda/impl/psb_z_elg_to_gpu.F90 rename to cuda/impl/psb_z_cuda_elg_to_gpu.F90 index 71a5ec66..6d86bdd9 100644 --- a/cuda/impl/psb_z_elg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_elg_to_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_to_gpu(a,info,nzrm) +subroutine psb_z_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_to_gpu + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_to_gpu #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -90,4 +90,4 @@ subroutine psb_z_elg_to_gpu(a,info,nzrm) call a%set_sync() #endif -end subroutine psb_z_elg_to_gpu +end subroutine psb_z_cuda_elg_to_gpu diff --git a/cuda/impl/psb_c_elg_trim.f90 b/cuda/impl/psb_z_cuda_elg_trim.f90 similarity index 92% rename from cuda/impl/psb_c_elg_trim.f90 rename to cuda/impl/psb_z_cuda_elg_trim.f90 index bc0c0696..3d261150 100644 --- a/cuda/impl/psb_c_elg_trim.f90 +++ b/cuda/impl/psb_z_cuda_elg_trim.f90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_elg_trim(a) +subroutine psb_z_cuda_elg_trim(a) use psb_base_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_trim + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_trim implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld character(len=20) :: name='trim' logical, parameter :: debug=.false. @@ -59,4 +59,4 @@ subroutine psb_c_elg_trim(a) return -end subroutine psb_c_elg_trim +end subroutine psb_z_cuda_elg_trim diff --git a/cuda/impl/psb_z_elg_vect_mv.F90 b/cuda/impl/psb_z_cuda_elg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_z_elg_vect_mv.F90 rename to cuda/impl/psb_z_cuda_elg_vect_mv.F90 index 5cd72e44..4bd1b3ed 100644 --- a/cuda/impl/psb_z_elg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_elg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_vect_mv + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_vect_mv #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_elg_vect_mv' + character(len=20) :: name='z_cuda_elg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= zzero) then @@ -128,4 +128,4 @@ subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_elg_vect_mv +end subroutine psb_z_cuda_elg_vect_mv diff --git a/cuda/impl/psb_z_hdiag_csmv.F90 b/cuda/impl/psb_z_cuda_hdiag_csmv.F90 similarity index 92% rename from cuda/impl/psb_z_hdiag_csmv.F90 rename to cuda/impl/psb_z_cuda_hdiag_csmv.F90 index baf730a2..8be14704 100644 --- a/cuda/impl/psb_z_hdiag_csmv.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_csmv + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_csmv #else - use psb_z_hdiag_mat_mod + use psb_z_cuda_hdiag_mat_mod #endif implicit none - class(psb_z_hdiag_sparse_mat), intent(in) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='z_hdiag_csmv' + character(len=20) :: name='z_cuda_hdiag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hdiag_csmv +end subroutine psb_z_cuda_hdiag_csmv diff --git a/cuda/impl/psb_z_hdiag_mold.F90 b/cuda/impl/psb_z_cuda_hdiag_mold.F90 similarity index 88% rename from cuda/impl/psb_z_hdiag_mold.F90 rename to cuda/impl/psb_z_cuda_hdiag_mold.F90 index b656ed0f..33fdd8eb 100644 --- a/cuda/impl/psb_z_hdiag_mold.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_hdiag_mold(a,b,info) +subroutine psb_z_cuda_hdiag_mold(a,b,info) use psb_base_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_mold + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_mold implicit none - class(psb_z_hdiag_sparse_mat), intent(in) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_hdiag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_hdiag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_hdiag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_z_hdiag_mold(a,b,info) return -end subroutine psb_z_hdiag_mold +end subroutine psb_z_cuda_hdiag_mold diff --git a/cuda/impl/psb_d_hdiag_to_gpu.F90 b/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 similarity index 92% rename from cuda/impl/psb_d_hdiag_to_gpu.F90 rename to cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 index fb013586..47126aca 100644 --- a/cuda/impl/psb_d_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hdiag_to_gpu(a,info) +subroutine psb_z_cuda_hdiag_to_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_to_gpu + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_to_gpu #else - use psb_d_hdiag_mat_mod + use psb_z_cuda_hdiag_mat_mod #endif use iso_c_binding implicit none - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight #ifdef HAVE_SPGPU @@ -83,4 +83,4 @@ subroutine psb_d_hdiag_to_gpu(a,info) #endif -end subroutine psb_d_hdiag_to_gpu +end subroutine psb_z_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_z_hdiag_vect_mv.F90 b/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_z_hdiag_vect_mv.F90 rename to cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 index 3e1c859e..cf0b3457 100644 --- a/cuda/impl/psb_z_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_vect_mv + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_vect_mv #else - use psb_z_hdiag_mat_mod + use psb_z_cuda_hdiag_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_hdiag_sparse_mat), intent(in) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hdiag_vect_mv' + character(len=20) :: name='z_cuda_hdiag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hdiag_vect_mv +end subroutine psb_z_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_z_hlg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_z_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 index e3c05ec1..228244f1 100644 --- a/cuda/impl/psb_z_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hlg_allocate_mnnz(m,n,a,nz) +subroutine psb_z_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_allocate_mnnz + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_allocate_mnnz #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -68,4 +68,4 @@ subroutine psb_z_hlg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_z_hlg_allocate_mnnz +end subroutine psb_z_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_z_hlg_csmm.F90 b/cuda/impl/psb_z_cuda_hlg_csmm.F90 similarity index 93% rename from cuda/impl/psb_z_hlg_csmm.F90 rename to cuda/impl/psb_z_cuda_hlg_csmm.F90 index 3432c177..325ab0d0 100644 --- a/cuda/impl/psb_z_hlg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_hlg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_csmm + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_csmm #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hlg_csmm' + character(len=20) :: name='z_cuda_hlg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -129,4 +129,4 @@ subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hlg_csmm +end subroutine psb_z_cuda_hlg_csmm diff --git a/cuda/impl/psb_z_hlg_csmv.F90 b/cuda/impl/psb_z_cuda_hlg_csmv.F90 similarity index 93% rename from cuda/impl/psb_z_hlg_csmv.F90 rename to cuda/impl/psb_z_cuda_hlg_csmv.F90 index b9b79e0c..ac84190e 100644 --- a/cuda/impl/psb_z_hlg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_hlg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_hlg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_csmv + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_csmv #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_hlg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='z_hlg_csmv' + character(len=20) :: name='z_cuda_hlg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -132,4 +132,4 @@ subroutine psb_z_hlg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hlg_csmv +end subroutine psb_z_cuda_hlg_csmv diff --git a/cuda/impl/psb_z_hlg_from_gpu.F90 b/cuda/impl/psb_z_cuda_hlg_from_gpu.F90 similarity index 92% rename from cuda/impl/psb_z_hlg_from_gpu.F90 rename to cuda/impl/psb_z_cuda_hlg_from_gpu.F90 index f582e506..4db6c3ce 100644 --- a/cuda/impl/psb_z_hlg_from_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hlg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_hlg_from_gpu(a,info) +subroutine psb_z_cuda_hlg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_from_gpu + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_from_gpu #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr @@ -73,4 +73,4 @@ subroutine psb_z_hlg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_z_hlg_from_gpu +end subroutine psb_z_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_z_hlg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_z_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 index 5a7b1031..f99a5a9e 100644 --- a/cuda/impl/psb_z_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_inner_vect_sv + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_inner_vect_sv #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -78,4 +78,4 @@ subroutine psb_z_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hlg_inner_vect_sv +end subroutine psb_z_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_z_hlg_mold.F90 b/cuda/impl/psb_z_cuda_hlg_mold.F90 similarity index 89% rename from cuda/impl/psb_z_hlg_mold.F90 rename to cuda/impl/psb_z_cuda_hlg_mold.F90 index f9ff0c7a..cc9ad510 100644 --- a/cuda/impl/psb_z_hlg_mold.F90 +++ b/cuda/impl/psb_z_cuda_hlg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_hlg_mold(a,b,info) +subroutine psb_z_cuda_hlg_mold(a,b,info) use psb_base_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_mold + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_mold implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer, intent(out) :: info Integer :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_hlg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_hlg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_hlg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -61,4 +61,4 @@ subroutine psb_z_hlg_mold(a,b,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_z_hlg_mold +end subroutine psb_z_cuda_hlg_mold diff --git a/cuda/impl/psb_z_hlg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_z_hlg_reallocate_nz.F90 rename to cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 index f3d50626..aaba9be5 100644 --- a/cuda/impl/psb_z_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_z_hlg_reallocate_nz(nz,a) +subroutine psb_z_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_reallocate_nz + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_reallocate_nz #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='z_hlg_reallocate_nz' + character(len=20) :: name='z_cuda_hlg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -64,4 +64,4 @@ subroutine psb_z_hlg_reallocate_nz(nz,a) return -end subroutine psb_z_hlg_reallocate_nz +end subroutine psb_z_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_z_hlg_scal.F90 b/cuda/impl/psb_z_cuda_hlg_scal.F90 similarity index 91% rename from cuda/impl/psb_z_hlg_scal.F90 rename to cuda/impl/psb_z_cuda_hlg_scal.F90 index 8aa85500..3ffda36a 100644 --- a/cuda/impl/psb_z_hlg_scal.F90 +++ b/cuda/impl/psb_z_cuda_hlg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_hlg_scal(d,a,info,side) +subroutine psb_z_cuda_hlg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_scal + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_scal #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,4 +72,4 @@ subroutine psb_z_hlg_scal(d,a,info,side) return -end subroutine psb_z_hlg_scal +end subroutine psb_z_cuda_hlg_scal diff --git a/cuda/impl/psb_z_hlg_scals.F90 b/cuda/impl/psb_z_cuda_hlg_scals.F90 similarity index 91% rename from cuda/impl/psb_z_hlg_scals.F90 rename to cuda/impl/psb_z_cuda_hlg_scals.F90 index d5689c06..bae50c7c 100644 --- a/cuda/impl/psb_z_hlg_scals.F90 +++ b/cuda/impl/psb_z_cuda_hlg_scals.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hlg_scals(d,a,info) +subroutine psb_z_cuda_hlg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_scals + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_scals #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_z_hlg_scals(d,a,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_z_hlg_scals +end subroutine psb_z_cuda_hlg_scals diff --git a/cuda/impl/psb_z_hlg_to_gpu.F90 b/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_z_hlg_to_gpu.F90 rename to cuda/impl/psb_z_cuda_hlg_to_gpu.F90 index d63aee9c..93c9f043 100644 --- a/cuda/impl/psb_z_hlg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hlg_to_gpu(a,info,nzrm) +subroutine psb_z_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_to_gpu + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_to_gpu #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -65,4 +65,4 @@ subroutine psb_z_hlg_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_z_hlg_to_gpu +end subroutine psb_z_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_z_hlg_vect_mv.F90 b/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_z_hlg_vect_mv.F90 rename to cuda/impl/psb_z_cuda_hlg_vect_mv.F90 index 9efefc0a..f377efec 100644 --- a/cuda/impl/psb_z_hlg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_vect_mv + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_vect_mv #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hlg_vect_mv' + character(len=20) :: name='z_cuda_hlg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -126,4 +126,4 @@ subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hlg_vect_mv +end subroutine psb_z_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_z_hybg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_z_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 index 2c38c536..0c6f9aa9 100644 --- a/cuda/impl/psb_z_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_allocate_mnnz(m,n,a,nz) +subroutine psb_z_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_allocate_mnnz + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_allocate_mnnz #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,5 +65,5 @@ subroutine psb_z_hybg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_z_hybg_allocate_mnnz +end subroutine psb_z_cuda_hybg_allocate_mnnz #endif diff --git a/cuda/impl/psb_z_hybg_csmm.F90 b/cuda/impl/psb_z_cuda_hybg_csmm.F90 similarity index 93% rename from cuda/impl/psb_z_hybg_csmm.F90 rename to cuda/impl/psb_z_cuda_hybg_csmm.F90 index 5ec9701b..d4a32420 100644 --- a/cuda/impl/psb_z_hybg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_hybg_csmm.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_csmm + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_csmm #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hybg_csmm' + character(len=20) :: name='z_cuda_hybg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,5 +131,5 @@ subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hybg_csmm +end subroutine psb_z_cuda_hybg_csmm #endif diff --git a/cuda/impl/psb_z_hybg_csmv.F90 b/cuda/impl/psb_z_cuda_hybg_csmv.F90 similarity index 93% rename from cuda/impl/psb_z_hybg_csmv.F90 rename to cuda/impl/psb_z_cuda_hybg_csmv.F90 index e7f39cb6..180a8ae1 100644 --- a/cuda/impl/psb_z_hybg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_hybg_csmv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_csmv + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_csmv #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_z_hybg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hybg_csmv' + character(len=20) :: name='z_cuda_hybg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -134,5 +134,5 @@ subroutine psb_z_hybg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hybg_csmv +end subroutine psb_z_cuda_hybg_csmv #endif diff --git a/cuda/impl/psb_z_hybg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_z_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 index 35c55ec7..1df47788 100644 --- a/cuda/impl/psb_z_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_inner_vect_sv + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_inner_vect_sv #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -52,7 +52,7 @@ subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='z_hybg_inner_vect_sv' + character(len=20) :: name='z_cuda_hybg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -84,9 +84,9 @@ subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= zzero) then if (yy%is_host()) call yy%sync() @@ -134,5 +134,5 @@ subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hybg_inner_vect_sv +end subroutine psb_z_cuda_hybg_inner_vect_sv #endif diff --git a/cuda/impl/psb_z_hybg_mold.F90 b/cuda/impl/psb_z_cuda_hybg_mold.F90 similarity index 89% rename from cuda/impl/psb_z_hybg_mold.F90 rename to cuda/impl/psb_z_cuda_hybg_mold.F90 index 3a17dbd2..5a13ff19 100644 --- a/cuda/impl/psb_z_hybg_mold.F90 +++ b/cuda/impl/psb_z_cuda_hybg_mold.F90 @@ -30,12 +30,12 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_mold(a,b,info) +subroutine psb_z_cuda_hybg_mold(a,b,info) use psb_base_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_mold + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_mold implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_hybg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_hybg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_hybg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,5 +62,5 @@ subroutine psb_z_hybg_mold(a,b,info) return -end subroutine psb_z_hybg_mold +end subroutine psb_z_cuda_hybg_mold #endif diff --git a/cuda/impl/psb_z_hybg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 similarity index 88% rename from cuda/impl/psb_z_hybg_reallocate_nz.F90 rename to cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 index 79d81911..5278ba35 100644 --- a/cuda/impl/psb_z_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_reallocate_nz(nz,a) +subroutine psb_z_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_reallocate_nz + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_reallocate_nz #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='z_hybg_reallocate_nz' + character(len=20) :: name='z_cuda_hybg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,5 +67,5 @@ subroutine psb_z_hybg_reallocate_nz(nz,a) return -end subroutine psb_z_hybg_reallocate_nz +end subroutine psb_z_cuda_hybg_reallocate_nz #endif diff --git a/cuda/impl/psb_z_hybg_scal.F90 b/cuda/impl/psb_z_cuda_hybg_scal.F90 similarity index 91% rename from cuda/impl/psb_z_hybg_scal.F90 rename to cuda/impl/psb_z_cuda_hybg_scal.F90 index c8179bf2..cd436e76 100644 --- a/cuda/impl/psb_z_hybg_scal.F90 +++ b/cuda/impl/psb_z_cuda_hybg_scal.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_scal(d,a,info,side) +subroutine psb_z_cuda_hybg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_scal + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_scal #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,5 +72,5 @@ subroutine psb_z_hybg_scal(d,a,info,side) return -end subroutine psb_z_hybg_scal +end subroutine psb_z_cuda_hybg_scal #endif diff --git a/cuda/impl/psb_z_hybg_scals.F90 b/cuda/impl/psb_z_cuda_hybg_scals.F90 similarity index 91% rename from cuda/impl/psb_z_hybg_scals.F90 rename to cuda/impl/psb_z_cuda_hybg_scals.F90 index 3729412d..0a9ee79d 100644 --- a/cuda/impl/psb_z_hybg_scals.F90 +++ b/cuda/impl/psb_z_cuda_hybg_scals.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_scals(d,a,info) +subroutine psb_z_cuda_hybg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_scals + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_scals #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -72,5 +72,5 @@ subroutine psb_z_hybg_scals(d,a,info) return -end subroutine psb_z_hybg_scals +end subroutine psb_z_cuda_hybg_scals #endif diff --git a/cuda/impl/psb_z_hybg_to_gpu.F90 b/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 similarity index 96% rename from cuda/impl/psb_z_hybg_to_gpu.F90 rename to cuda/impl/psb_z_cuda_hybg_to_gpu.F90 index 4a2a9b1c..107b5049 100644 --- a/cuda/impl/psb_z_hybg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_to_gpu(a,info,nzrm) +subroutine psb_z_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_to_gpu + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_to_gpu #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -150,5 +150,5 @@ subroutine psb_z_hybg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_z_hybg_to_gpu +end subroutine psb_z_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_z_hybg_vect_mv.F90 b/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_z_hybg_vect_mv.F90 rename to cuda/impl/psb_z_cuda_hybg_vect_mv.F90 index f3b6695e..22751f2d 100644 --- a/cuda/impl/psb_z_hybg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 @@ -30,20 +30,20 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_vect_mv + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_vect_mv #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -53,7 +53,7 @@ subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hybg_vect_mv' + character(len=20) :: name='z_cuda_hybg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= zzero) then if (yy%is_host()) call yy%sync() @@ -123,5 +123,5 @@ subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hybg_vect_mv +end subroutine psb_z_cuda_hybg_vect_mv #endif diff --git a/cuda/impl/psb_z_mv_csrg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_mv_csrg_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 index 21771b89..d5390ee3 100644 --- a/cuda/impl/psb_z_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_mv_csrg_from_coo(a,b,info) +subroutine psb_z_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_mv_csrg_from_coo + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_mv_csrg_from_coo #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -62,4 +62,4 @@ subroutine psb_z_mv_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_mv_csrg_from_coo +end subroutine psb_z_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_z_mv_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_z_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 index 31408214..e2bfdb73 100644 --- a/cuda/impl/psb_z_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_mv_csrg_from_fmt(a,b,info) +subroutine psb_z_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_mv_csrg_from_fmt + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_mv_csrg_from_fmt #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer, intent(out) :: info @@ -60,4 +60,4 @@ subroutine psb_z_mv_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_z_mv_csrg_from_fmt +end subroutine psb_z_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_z_mv_diag_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_mv_diag_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 index 8872c890..b61813bf 100644 --- a/cuda/impl/psb_z_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_z_mv_diag_from_coo(a,b,info) +subroutine psb_z_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_mv_diag_from_coo + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_mv_diag_from_coo #else - use psb_z_diag_mat_mod + use psb_z_cuda_diag_mat_mod #endif implicit none - class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -66,4 +66,4 @@ subroutine psb_z_mv_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_mv_diag_from_coo +end subroutine psb_z_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_z_mv_elg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_mv_elg_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 index 2d78edc6..e3ff4036 100644 --- a/cuda/impl/psb_z_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_mv_elg_from_coo(a,b,info) +subroutine psb_z_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_mv_elg_from_coo + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_mv_elg_from_coo #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_z_mv_elg_from_coo(a,b,info) return -end subroutine psb_z_mv_elg_from_coo +end subroutine psb_z_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_z_mv_elg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 similarity index 92% rename from cuda/impl/psb_z_mv_elg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 index 3bf663b3..07a80173 100644 --- a/cuda/impl/psb_z_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_mv_elg_from_fmt(a,b,info) +subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_mv_elg_from_fmt + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_mv_elg_from_fmt #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -96,4 +96,4 @@ subroutine psb_z_mv_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_z_mv_elg_from_fmt +end subroutine psb_z_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_z_mv_hdiag_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_z_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 index e1df9cc4..f25e6370 100644 --- a/cuda/impl/psb_z_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_z_mv_hdiag_from_coo(a,b,info) +subroutine psb_z_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_mv_hdiag_from_coo - use psb_gpu_env_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_z_hdiag_mat_mod + use psb_z_cuda_hdiag_mat_mod #endif implicit none - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_z_mv_hdiag_from_coo(a,b,info) #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_z_hdia_sparse_mat%mv_from_coo(b,info) @@ -71,4 +71,4 @@ subroutine psb_z_mv_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_mv_hdiag_from_coo +end subroutine psb_z_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_z_mv_hlg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 similarity index 88% rename from cuda/impl/psb_z_mv_hlg_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 index ce037be2..3bc630de 100644 --- a/cuda/impl/psb_z_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_z_mv_hlg_from_coo(a,b,info) +subroutine psb_z_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_mv_hlg_from_coo + use psb_cuda_env_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_mv_hlg_from_coo #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_z_mv_hlg_from_coo(a,b,info) return -end subroutine psb_z_mv_hlg_from_coo +end subroutine psb_z_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_z_mv_hlg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_z_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 index 4ea1b385..d746a341 100644 --- a/cuda/impl/psb_z_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_mv_hlg_from_fmt(a,b,info) +subroutine psb_z_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_mv_hlg_from_fmt + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_mv_hlg_from_fmt #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_z_mv_hlg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_z_mv_hlg_from_fmt +end subroutine psb_z_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_z_mv_hybg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_mv_hybg_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 index 3424caea..7d0d9eec 100644 --- a/cuda/impl/psb_z_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_mv_hybg_from_coo(a,b,info) +subroutine psb_z_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_mv_hybg_from_coo + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_mv_hybg_from_coo #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -61,5 +61,5 @@ subroutine psb_z_mv_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_mv_hybg_from_coo +end subroutine psb_z_cuda_mv_hybg_from_coo #endif diff --git a/cuda/impl/psb_z_mv_hybg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_z_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 index 90c35897..7bfc27e3 100644 --- a/cuda/impl/psb_z_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_mv_hybg_from_fmt(a,b,info) +subroutine psb_z_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_mv_hybg_from_fmt + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_mv_hybg_from_fmt #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_z_mv_hybg_from_fmt(a,b,info) call a%to_gpu(info) #endif end select -end subroutine psb_z_mv_hybg_from_fmt +end subroutine psb_z_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/psb_c_csrg_mat_mod.F90 b/cuda/psb_c_csrg_mat_mod.F90 deleted file mode 100644 index 203a6dbf..00000000 --- a/cuda/psb_c_csrg_mat_mod.F90 +++ /dev/null @@ -1,393 +0,0 @@ -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! - - -module psb_c_csrg_mat_mod - - use iso_c_binding - use psb_c_mat_mod - use cusparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_c_csr_sparse_mat) :: psb_c_csrg_sparse_mat - ! - ! cuSPARSE 4.0 CSR format. - ! - ! - ! - ! - ! -#ifdef HAVE_SPGPU - type(c_Cmat) :: deviceMat - integer(psb_ipk_) :: devstate = is_host - - contains - procedure, nopass :: get_fmt => c_csrg_get_fmt - procedure, pass(a) :: sizeof => c_csrg_sizeof - procedure, pass(a) :: vect_mv => psb_c_csrg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_c_csrg_inner_vect_sv - procedure, pass(a) :: csmm => psb_c_csrg_csmm - procedure, pass(a) :: csmv => psb_c_csrg_csmv - procedure, pass(a) :: scals => psb_c_csrg_scals - procedure, pass(a) :: scalv => psb_c_csrg_scal - procedure, pass(a) :: reallocate_nz => psb_c_csrg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_c_csrg_allocate_mnnz - ! Note: we do *not* need the TO methods, because the parent type - ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_csrg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_cp_csrg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_csrg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_mv_csrg_from_fmt - procedure, pass(a) :: free => c_csrg_free - procedure, pass(a) :: mold => psb_c_csrg_mold - procedure, pass(a) :: is_host => c_csrg_is_host - procedure, pass(a) :: is_dev => c_csrg_is_dev - procedure, pass(a) :: is_sync => c_csrg_is_sync - procedure, pass(a) :: set_host => c_csrg_set_host - procedure, pass(a) :: set_dev => c_csrg_set_dev - procedure, pass(a) :: set_sync => c_csrg_set_sync - procedure, pass(a) :: sync => c_csrg_sync - procedure, pass(a) :: to_gpu => psb_c_csrg_to_gpu - procedure, pass(a) :: from_gpu => psb_c_csrg_from_gpu - final :: c_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_csrg_mold -#endif - end type psb_c_csrg_sparse_mat - -#ifdef HAVE_SPGPU - private :: c_csrg_get_nzeros, c_csrg_free, c_csrg_get_fmt, & - & c_csrg_get_size, c_csrg_sizeof, c_csrg_get_nz_row - - - interface - subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta - class(psb_c_base_vect_type), intent(inout) :: x - class(psb_c_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_csrg_inner_vect_sv - end interface - - - interface - subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta - class(psb_c_base_vect_type), intent(inout) :: x - class(psb_c_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_csrg_vect_mv - end interface - - interface - subroutine psb_c_csrg_reallocate_nz(nz,a) - import :: psb_c_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: nz - class(psb_c_csrg_sparse_mat), intent(inout) :: a - end subroutine psb_c_csrg_reallocate_nz - end interface - - interface - subroutine psb_c_csrg_allocate_mnnz(m,n,a,nz) - import :: psb_c_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: m,n - class(psb_c_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_c_csrg_allocate_mnnz - end interface - - interface - subroutine psb_c_csrg_mold(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_csrg_mold - end interface - - interface - subroutine psb_c_csrg_to_gpu(a,info, nzrm) - import :: psb_c_csrg_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_c_csrg_to_gpu - end interface - - interface - subroutine psb_c_csrg_from_gpu(a,info) - import :: psb_c_csrg_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_csrg_from_gpu - end interface - - interface - subroutine psb_c_cp_csrg_from_coo(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - class(psb_c_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_csrg_from_coo - end interface - - interface - subroutine psb_c_cp_csrg_from_fmt(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - class(psb_c_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_csrg_from_fmt - end interface - - interface - subroutine psb_c_mv_csrg_from_coo(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - class(psb_c_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_csrg_from_coo - end interface - - interface - subroutine psb_c_mv_csrg_from_fmt(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - class(psb_c_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_csrg_from_fmt - end interface - - interface - subroutine psb_c_csrg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta, x(:) - complex(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_csrg_csmv - end interface - interface - subroutine psb_c_csrg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) - complex(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_csrg_csmm - end interface - - interface - subroutine psb_c_csrg_scal(d,a,info,side) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - complex(psb_spk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side - end subroutine psb_c_csrg_scal - end interface - - interface - subroutine psb_c_csrg_scals(d,a,info) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - complex(psb_spk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_csrg_scals - end interface - - -contains - - ! == =================================== - ! - ! - ! - ! Getters - ! - ! - ! - ! - ! - ! == =================================== - - - function c_csrg_sizeof(a) result(res) - implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - res = 8 - res = res + (2*psb_sizeof_sp) * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - ! Should we account for the shadow data structure - ! on the GPU device side? - ! res = 2*res - - end function c_csrg_sizeof - - function c_csrg_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSRG' - end function c_csrg_get_fmt - - - - ! == =================================== - ! - ! - ! - ! Data management - ! - ! - ! - ! - ! - ! == =================================== - - - subroutine c_csrg_set_host(a) - implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine c_csrg_set_host - - subroutine c_csrg_set_dev(a) - implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine c_csrg_set_dev - - subroutine c_csrg_set_sync(a) - implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine c_csrg_set_sync - - function c_csrg_is_dev(a) result(res) - implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function c_csrg_is_dev - - function c_csrg_is_host(a) result(res) - implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function c_csrg_is_host - - function c_csrg_is_sync(a) result(res) - implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function c_csrg_is_sync - - - subroutine c_csrg_sync(a) - implicit none - class(psb_c_csrg_sparse_mat), target, intent(in) :: a - class(psb_c_csrg_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (tmpa%is_host()) then - call tmpa%to_gpu(info) - else if (tmpa%is_dev()) then - call tmpa%from_gpu(info) - end if - call tmpa%set_sync() - return - - end subroutine c_csrg_sync - - subroutine c_csrg_free(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - class(psb_c_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - call a%psb_c_csr_sparse_mat%free() - - return - - end subroutine c_csrg_free - - subroutine c_csrg_finalize(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - type(psb_c_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - - return - - end subroutine c_csrg_finalize - -#else - interface - subroutine psb_c_csrg_mold(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_csrg_mold - end interface - -#endif - -end module psb_c_csrg_mat_mod diff --git a/cuda/psb_c_cuda_csrg_mat_mod.F90 b/cuda/psb_c_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..a98d7e99 --- /dev/null +++ b/cuda/psb_c_cuda_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_csrg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_csr_sparse_mat) :: psb_c_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(c_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => c_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_c_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_c_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => c_cuda_csrg_free + procedure, pass(a) :: mold => psb_c_cuda_csrg_mold + procedure, pass(a) :: is_host => c_cuda_csrg_is_host + procedure, pass(a) :: is_dev => c_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => c_cuda_csrg_is_sync + procedure, pass(a) :: set_host => c_cuda_csrg_set_host + procedure, pass(a) :: set_dev => c_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => c_cuda_csrg_set_sync + procedure, pass(a) :: sync => c_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_c_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_c_cuda_csrg_from_gpu + final :: c_cuda_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_cuda_csrg_mold +#endif + end type psb_c_cuda_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_cuda_csrg_get_nzeros, c_cuda_csrg_free, c_cuda_csrg_get_fmt, & + & c_cuda_csrg_get_size, c_cuda_csrg_sizeof, c_cuda_csrg_get_nz_row + + + interface + subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_c_cuda_csrg_reallocate_nz(nz,a) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_c_cuda_csrg_mold(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_mold + end interface + + interface + subroutine psb_c_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_c_cuda_csrg_from_gpu(a,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_c_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_c_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_csmv + end interface + interface + subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_csmm + end interface + + interface + subroutine psb_c_cuda_csrg_scal(d,a,info,side) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_cuda_csrg_scal + end interface + + interface + subroutine psb_c_cuda_csrg_scals(d,a,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_cuda_csrg_sizeof + + function c_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function c_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine c_cuda_csrg_set_host(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_cuda_csrg_set_host + + subroutine c_cuda_csrg_set_dev(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_cuda_csrg_set_dev + + subroutine c_cuda_csrg_set_sync(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_cuda_csrg_set_sync + + function c_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_cuda_csrg_is_dev + + function c_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_cuda_csrg_is_host + + function c_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_cuda_csrg_is_sync + + + subroutine c_cuda_csrg_sync(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine c_cuda_csrg_sync + + subroutine c_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_c_csr_sparse_mat%free() + + return + + end subroutine c_cuda_csrg_free + + subroutine c_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine c_cuda_csrg_finalize + +#else + interface + subroutine psb_c_cuda_csrg_mold(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_mold + end interface + +#endif + +end module psb_c_cuda_csrg_mat_mod diff --git a/cuda/psb_c_diag_mat_mod.F90 b/cuda/psb_c_cuda_diag_mat_mod.F90 similarity index 52% rename from cuda/psb_c_diag_mat_mod.F90 rename to cuda/psb_c_cuda_diag_mat_mod.F90 index a7ab2fbb..1d5db05b 100644 --- a/cuda/psb_c_diag_mat_mod.F90 +++ b/cuda/psb_c_cuda_diag_mat_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_c_diag_mat_mod +module psb_c_cuda_diag_mat_mod use iso_c_binding use psb_base_mod use psb_c_dia_mat_mod - type, extends(psb_c_dia_sparse_mat) :: psb_c_diag_sparse_mat + type, extends(psb_c_dia_sparse_mat) :: psb_c_cuda_diag_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -48,170 +48,170 @@ module psb_c_diag_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => c_diag_get_fmt - procedure, pass(a) :: sizeof => c_diag_sizeof - procedure, pass(a) :: vect_mv => psb_c_diag_vect_mv -! procedure, pass(a) :: csmm => psb_c_diag_csmm - procedure, pass(a) :: csmv => psb_c_diag_csmv -! procedure, pass(a) :: in_vect_sv => psb_c_diag_inner_vect_sv -! procedure, pass(a) :: scals => psb_c_diag_scals -! procedure, pass(a) :: scalv => psb_c_diag_scal -! procedure, pass(a) :: reallocate_nz => psb_c_diag_reallocate_nz -! procedure, pass(a) :: allocate_mnnz => psb_c_diag_allocate_mnnz + procedure, nopass :: get_fmt => c_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => c_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_c_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_c_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_c_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_c_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_c_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_c_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_c_cuda_diag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_diag_from_coo -! procedure, pass(a) :: cp_from_fmt => psb_c_cp_diag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_diag_from_coo -! procedure, pass(a) :: mv_from_fmt => psb_c_mv_diag_from_fmt - procedure, pass(a) :: free => c_diag_free - procedure, pass(a) :: mold => psb_c_diag_mold - procedure, pass(a) :: to_gpu => psb_c_diag_to_gpu - final :: c_diag_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => c_cuda_diag_free + procedure, pass(a) :: mold => psb_c_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_diag_to_gpu + final :: c_cuda_diag_finalize #else contains - procedure, pass(a) :: mold => psb_c_diag_mold + procedure, pass(a) :: mold => psb_c_cuda_diag_mold #endif - end type psb_c_diag_sparse_mat + end type psb_c_cuda_diag_sparse_mat #ifdef HAVE_SPGPU - private :: c_diag_get_nzeros, c_diag_free, c_diag_get_fmt, & - & c_diag_get_size, c_diag_sizeof, c_diag_get_nz_row + private :: c_cuda_diag_get_nzeros, c_cuda_diag_free, c_cuda_diag_get_fmt, & + & c_cuda_diag_get_size, c_cuda_diag_sizeof, c_cuda_diag_get_nz_row interface - subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_diag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_diag_vect_mv + end subroutine psb_c_cuda_diag_vect_mv end interface interface - subroutine psb_c_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_c_diag_sparse_mat, psb_spk_, psb_c_base_vect_type - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_cuda_diag_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_diag_inner_vect_sv + end subroutine psb_c_cuda_diag_inner_vect_sv end interface interface - subroutine psb_c_diag_reallocate_nz(nz,a) - import :: psb_c_diag_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_diag_reallocate_nz(nz,a) + import :: psb_c_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_c_diag_sparse_mat), intent(inout) :: a - end subroutine psb_c_diag_reallocate_nz + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_diag_reallocate_nz end interface interface - subroutine psb_c_diag_allocate_mnnz(m,n,a,nz) - import :: psb_c_diag_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_c_diag_allocate_mnnz + end subroutine psb_c_cuda_diag_allocate_mnnz end interface interface - subroutine psb_c_diag_mold(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_mold(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_diag_mold + end subroutine psb_c_cuda_diag_mold end interface interface - subroutine psb_c_diag_to_gpu(a,info, nzrm) - import :: psb_c_diag_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_c_cuda_diag_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_c_diag_to_gpu + end subroutine psb_c_cuda_diag_to_gpu end interface interface - subroutine psb_c_cp_diag_from_coo(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_diag_from_coo(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_diag_from_coo + end subroutine psb_c_cuda_cp_diag_from_coo end interface interface - subroutine psb_c_cp_diag_from_fmt(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_diag_from_fmt + end subroutine psb_c_cuda_cp_diag_from_fmt end interface interface - subroutine psb_c_mv_diag_from_coo(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_diag_from_coo(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_diag_from_coo + end subroutine psb_c_cuda_mv_diag_from_coo end interface interface - subroutine psb_c_mv_diag_from_fmt(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_diag_from_fmt + end subroutine psb_c_cuda_mv_diag_from_fmt end interface interface - subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_diag_csmv + end subroutine psb_c_cuda_diag_csmv end interface interface - subroutine psb_c_diag_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_diag_csmm + end subroutine psb_c_cuda_diag_csmm end interface interface - subroutine psb_c_diag_scal(d,a,info, side) - import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_diag_scal(d,a,info, side) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_c_diag_scal + end subroutine psb_c_cuda_diag_scal end interface interface - subroutine psb_c_diag_scals(d,a,info) - import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_diag_scals(d,a,info) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_diag_scals + end subroutine psb_c_cuda_diag_scals end interface @@ -230,9 +230,9 @@ contains ! == =================================== - function c_diag_sizeof(a) result(res) + function c_cuda_diag_sizeof(a) result(res) implicit none - class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function c_diag_sizeof + end function c_cuda_diag_sizeof - function c_diag_get_fmt() result(res) + function c_cuda_diag_get_fmt() result(res) implicit none character(len=5) :: res res = 'DIAG' - end function c_diag_get_fmt + end function c_cuda_diag_get_fmt @@ -265,11 +265,11 @@ contains ! ! == =================================== - subroutine c_diag_free(a) + subroutine c_cuda_diag_free(a) use diagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) @@ -278,31 +278,31 @@ contains return - end subroutine c_diag_free + end subroutine c_cuda_diag_free - subroutine c_diag_finalize(a) + subroutine c_cuda_diag_finalize(a) use diagdev_mod implicit none - type(psb_c_diag_sparse_mat), intent(inout) :: a + type(psb_c_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine c_diag_finalize + end subroutine c_cuda_diag_finalize #else interface - subroutine psb_c_diag_mold(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_mold(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_diag_mold + end subroutine psb_c_cuda_diag_mold end interface #endif -end module psb_c_diag_mat_mod +end module psb_c_cuda_diag_mat_mod diff --git a/cuda/psb_c_dnsg_mat_mod.F90 b/cuda/psb_c_cuda_dnsg_mat_mod.F90 similarity index 51% rename from cuda/psb_c_dnsg_mat_mod.F90 rename to cuda/psb_c_cuda_dnsg_mat_mod.F90 index 7fe5fdda..e89e117b 100644 --- a/cuda/psb_c_dnsg_mat_mod.F90 +++ b/cuda/psb_c_cuda_dnsg_mat_mod.F90 @@ -30,14 +30,14 @@ ! -module psb_c_dnsg_mat_mod +module psb_c_cuda_dnsg_mat_mod use iso_c_binding use psb_c_mat_mod use psb_c_dns_mat_mod use dnsdev_mod - type, extends(psb_c_dns_sparse_mat) :: psb_c_dnsg_sparse_mat + type, extends(psb_c_dns_sparse_mat) :: psb_c_cuda_dnsg_sparse_mat ! ! ITPACK/DNS format, extended. ! We are adding here the routines to create a copy of the data @@ -49,169 +49,169 @@ module psb_c_dnsg_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => c_dnsg_get_fmt - ! procedure, pass(a) :: sizeof => c_dnsg_sizeof - procedure, pass(a) :: vect_mv => psb_c_dnsg_vect_mv -!!$ procedure, pass(a) :: csmm => psb_c_dnsg_csmm -!!$ procedure, pass(a) :: csmv => psb_c_dnsg_csmv -!!$ procedure, pass(a) :: in_vect_sv => psb_c_dnsg_inner_vect_sv -!!$ procedure, pass(a) :: scals => psb_c_dnsg_scals -!!$ procedure, pass(a) :: scalv => psb_c_dnsg_scal -!!$ procedure, pass(a) :: reallocate_nz => psb_c_dnsg_reallocate_nz -!!$ procedure, pass(a) :: allocate_mnnz => psb_c_dnsg_allocate_mnnz + procedure, nopass :: get_fmt => c_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => c_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_c_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_c_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_c_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_c_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_c_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_c_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_c_cuda_dnsg_allocate_mnnz ! Note: we *do* need the TO methods, because of the need to invoke SYNC ! - procedure, pass(a) :: cp_from_coo => psb_c_cp_dnsg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_cp_dnsg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_dnsg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_mv_dnsg_from_fmt - procedure, pass(a) :: free => c_dnsg_free - procedure, pass(a) :: mold => psb_c_dnsg_mold - procedure, pass(a) :: to_gpu => psb_c_dnsg_to_gpu - final :: c_dnsg_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => c_cuda_dnsg_free + procedure, pass(a) :: mold => psb_c_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_dnsg_to_gpu + final :: c_cuda_dnsg_finalize #else contains - procedure, pass(a) :: mold => psb_c_dnsg_mold + procedure, pass(a) :: mold => psb_c_cuda_dnsg_mold #endif - end type psb_c_dnsg_sparse_mat + end type psb_c_cuda_dnsg_sparse_mat #ifdef HAVE_SPGPU - private :: c_dnsg_get_nzeros, c_dnsg_free, c_dnsg_get_fmt, & - & c_dnsg_get_size, c_dnsg_get_nz_row + private :: c_cuda_dnsg_get_nzeros, c_cuda_dnsg_free, c_cuda_dnsg_get_fmt, & + & c_cuda_dnsg_get_size, c_cuda_dnsg_get_nz_row interface - subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_dnsg_vect_mv + end subroutine psb_c_cuda_dnsg_vect_mv end interface !!$ !!$ interface -!!$ subroutine psb_c_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_c_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type -!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_c_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_dnsg_inner_vect_sv +!!$ end subroutine psb_c_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_c_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_c_dnsg_reallocate_nz +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_c_dnsg_allocate_mnnz +!!$ end subroutine psb_c_cuda_dnsg_allocate_mnnz !!$ end interface interface - subroutine psb_c_dnsg_mold(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_dnsg_mold(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_dnsg_mold + end subroutine psb_c_cuda_dnsg_mold end interface interface - subroutine psb_c_dnsg_to_gpu(a,info) - import :: psb_c_dnsg_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_dnsg_to_gpu(a,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_dnsg_to_gpu + end subroutine psb_c_cuda_dnsg_to_gpu end interface interface - subroutine psb_c_cp_dnsg_from_coo(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_dnsg_from_coo + end subroutine psb_c_cuda_cp_dnsg_from_coo end interface interface - subroutine psb_c_cp_dnsg_from_fmt(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_dnsg_from_fmt + end subroutine psb_c_cuda_cp_dnsg_from_fmt end interface interface - subroutine psb_c_mv_dnsg_from_coo(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_dnsg_from_coo + end subroutine psb_c_cuda_mv_dnsg_from_coo end interface interface - subroutine psb_c_mv_dnsg_from_fmt(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_dnsg_from_fmt + end subroutine psb_c_cuda_mv_dnsg_from_fmt end interface !!$ interface -!!$ subroutine psb_c_dnsg_csmv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:) !!$ complex(psb_spk_), intent(inout) :: y(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_dnsg_csmv +!!$ end subroutine psb_c_cuda_dnsg_csmv !!$ end interface !!$ interface -!!$ subroutine psb_c_dnsg_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) !!$ complex(psb_spk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_dnsg_csmm +!!$ end subroutine psb_c_cuda_dnsg_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_dnsg_scal(d,a,info, side) -!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ complex(psb_spk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_c_dnsg_scal +!!$ end subroutine psb_c_cuda_dnsg_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_dnsg_scals(d,a,info) -!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ complex(psb_spk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_c_dnsg_scals +!!$ end subroutine psb_c_cuda_dnsg_scals !!$ end interface !!$ @@ -231,11 +231,11 @@ contains - function c_dnsg_get_fmt() result(res) + function c_cuda_dnsg_get_fmt() result(res) implicit none character(len=5) :: res res = 'DNSG' - end function c_dnsg_get_fmt + end function c_cuda_dnsg_get_fmt @@ -251,11 +251,11 @@ contains ! ! == =================================== - subroutine c_dnsg_free(a) + subroutine c_cuda_dnsg_free(a) use dnsdev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) @@ -264,31 +264,31 @@ contains return - end subroutine c_dnsg_free + end subroutine c_cuda_dnsg_free - subroutine c_dnsg_finalize(a) + subroutine c_cuda_dnsg_finalize(a) use dnsdev_mod implicit none - type(psb_c_dnsg_sparse_mat), intent(inout) :: a + type(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine c_dnsg_finalize + end subroutine c_cuda_dnsg_finalize #else interface - subroutine psb_c_dnsg_mold(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_dnsg_mold(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_dnsg_mold + end subroutine psb_c_cuda_dnsg_mold end interface #endif -end module psb_c_dnsg_mat_mod +end module psb_c_cuda_dnsg_mat_mod diff --git a/cuda/psb_c_elg_mat_mod.F90 b/cuda/psb_c_cuda_elg_mat_mod.F90 similarity index 50% rename from cuda/psb_c_elg_mat_mod.F90 rename to cuda/psb_c_cuda_elg_mat_mod.F90 index 83355b9d..43250ce3 100644 --- a/cuda/psb_c_elg_mat_mod.F90 +++ b/cuda/psb_c_cuda_elg_mat_mod.F90 @@ -30,18 +30,18 @@ ! -module psb_c_elg_mat_mod +module psb_c_cuda_elg_mat_mod use iso_c_binding use psb_c_mat_mod use psb_c_ell_mat_mod - use psb_i_gpu_vect_mod + use psb_i_cuda_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_c_ell_sparse_mat) :: psb_c_elg_sparse_mat + type, extends(psb_c_ell_sparse_mat) :: psb_c_cuda_elg_sparse_mat ! ! ITPACK/ELL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,221 +54,221 @@ module psb_c_elg_mat_mod integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => c_elg_get_fmt - procedure, pass(a) :: sizeof => c_elg_sizeof - procedure, pass(a) :: vect_mv => psb_c_elg_vect_mv - procedure, pass(a) :: csmm => psb_c_elg_csmm - procedure, pass(a) :: csmv => psb_c_elg_csmv - procedure, pass(a) :: in_vect_sv => psb_c_elg_inner_vect_sv - procedure, pass(a) :: scals => psb_c_elg_scals - procedure, pass(a) :: scalv => psb_c_elg_scal - procedure, pass(a) :: reallocate_nz => psb_c_elg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_c_elg_allocate_mnnz - procedure, pass(a) :: reinit => c_elg_reinit + procedure, nopass :: get_fmt => c_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_c_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_c_cuda_elg_scals + procedure, pass(a) :: scalv => psb_c_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => c_cuda_elg_reinit ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_elg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_cp_elg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_elg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_mv_elg_from_fmt - procedure, pass(a) :: free => c_elg_free - procedure, pass(a) :: mold => psb_c_elg_mold - procedure, pass(a) :: csput_a => psb_c_elg_csput_a - procedure, pass(a) :: csput_v => psb_c_elg_csput_v - procedure, pass(a) :: is_host => c_elg_is_host - procedure, pass(a) :: is_dev => c_elg_is_dev - procedure, pass(a) :: is_sync => c_elg_is_sync - procedure, pass(a) :: set_host => c_elg_set_host - procedure, pass(a) :: set_dev => c_elg_set_dev - procedure, pass(a) :: set_sync => c_elg_set_sync - procedure, pass(a) :: sync => c_elg_sync - procedure, pass(a) :: from_gpu => psb_c_elg_from_gpu - procedure, pass(a) :: to_gpu => psb_c_elg_to_gpu - procedure, pass(a) :: asb => psb_c_elg_asb - final :: c_elg_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => c_cuda_elg_free + procedure, pass(a) :: mold => psb_c_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_c_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_c_cuda_elg_csput_v + procedure, pass(a) :: is_host => c_cuda_elg_is_host + procedure, pass(a) :: is_dev => c_cuda_elg_is_dev + procedure, pass(a) :: is_sync => c_cuda_elg_is_sync + procedure, pass(a) :: set_host => c_cuda_elg_set_host + procedure, pass(a) :: set_dev => c_cuda_elg_set_dev + procedure, pass(a) :: set_sync => c_cuda_elg_set_sync + procedure, pass(a) :: sync => c_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_c_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_c_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_c_cuda_elg_asb + final :: c_cuda_elg_finalize #else contains - procedure, pass(a) :: mold => psb_c_elg_mold - procedure, pass(a) :: asb => psb_c_elg_asb + procedure, pass(a) :: mold => psb_c_cuda_elg_mold + procedure, pass(a) :: asb => psb_c_cuda_elg_asb #endif - end type psb_c_elg_sparse_mat + end type psb_c_cuda_elg_sparse_mat #ifdef HAVE_SPGPU - private :: c_elg_get_nzeros, c_elg_free, c_elg_get_fmt, & - & c_elg_get_size, c_elg_sizeof, c_elg_get_nz_row, c_elg_sync + private :: c_cuda_elg_get_nzeros, c_cuda_elg_free, c_cuda_elg_get_fmt, & + & c_cuda_elg_get_size, c_cuda_elg_sizeof, c_cuda_elg_get_nz_row, c_cuda_elg_sync interface - subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_elg_vect_mv + end subroutine psb_c_cuda_elg_vect_mv end interface interface - subroutine psb_c_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_c_elg_sparse_mat, psb_spk_, psb_c_base_vect_type - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_cuda_elg_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_elg_inner_vect_sv + end subroutine psb_c_cuda_elg_inner_vect_sv end interface interface - subroutine psb_c_elg_reallocate_nz(nz,a) - import :: psb_c_elg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_elg_reallocate_nz(nz,a) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_c_elg_sparse_mat), intent(inout) :: a - end subroutine psb_c_elg_reallocate_nz + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_elg_reallocate_nz end interface interface - subroutine psb_c_elg_allocate_mnnz(m,n,a,nz) - import :: psb_c_elg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_c_elg_allocate_mnnz + end subroutine psb_c_cuda_elg_allocate_mnnz end interface interface - subroutine psb_c_elg_mold(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_mold(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_mold + end subroutine psb_c_cuda_elg_mold end interface interface - subroutine psb_c_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& & imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_csput_a + end subroutine psb_c_cuda_elg_csput_a end interface interface - subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_c_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_c_base_vect_type,& + subroutine psb_c_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_c_base_vect_type,& & psb_i_base_vect_type - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_csput_v + end subroutine psb_c_cuda_elg_csput_v end interface interface - subroutine psb_c_elg_from_gpu(a,info) - import :: psb_c_elg_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_elg_from_gpu(a,info) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_from_gpu + end subroutine psb_c_cuda_elg_from_gpu end interface interface - subroutine psb_c_elg_to_gpu(a,info, nzrm) - import :: psb_c_elg_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_c_elg_to_gpu + end subroutine psb_c_cuda_elg_to_gpu end interface interface - subroutine psb_c_cp_elg_from_coo(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_elg_from_coo + end subroutine psb_c_cuda_cp_elg_from_coo end interface interface - subroutine psb_c_cp_elg_from_fmt(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_elg_from_fmt + end subroutine psb_c_cuda_cp_elg_from_fmt end interface interface - subroutine psb_c_mv_elg_from_coo(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_elg_from_coo(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_elg_from_coo + end subroutine psb_c_cuda_mv_elg_from_coo end interface interface - subroutine psb_c_mv_elg_from_fmt(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_elg_from_fmt + end subroutine psb_c_cuda_mv_elg_from_fmt end interface interface - subroutine psb_c_elg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_elg_csmv + end subroutine psb_c_cuda_elg_csmv end interface interface - subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_elg_csmm + end subroutine psb_c_cuda_elg_csmm end interface interface - subroutine psb_c_elg_scal(d,a,info, side) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_elg_scal(d,a,info, side) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_c_elg_scal + end subroutine psb_c_cuda_elg_scal end interface interface - subroutine psb_c_elg_scals(d,a,info) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_elg_scals(d,a,info) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_scals + end subroutine psb_c_cuda_elg_scals end interface interface - subroutine psb_c_elg_asb(a) - import :: psb_c_elg_sparse_mat - class(psb_c_elg_sparse_mat), intent(inout) :: a - end subroutine psb_c_elg_asb + subroutine psb_c_cuda_elg_asb(a) + import :: psb_c_cuda_elg_sparse_mat + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_elg_asb end interface @@ -287,9 +287,9 @@ contains ! == =================================== - function c_elg_sizeof(a) result(res) + function c_cuda_elg_sizeof(a) result(res) implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res if (a%is_dev()) call a%sync() @@ -302,13 +302,13 @@ contains ! on the GPU device side? ! res = 2*res - end function c_elg_sizeof + end function c_cuda_elg_sizeof - function c_elg_get_fmt() result(res) + function c_cuda_elg_get_fmt() result(res) implicit none character(len=5) :: res res = 'ELG' - end function c_elg_get_fmt + end function c_cuda_elg_get_fmt @@ -323,12 +323,12 @@ contains ! ! ! == =================================== - subroutine c_elg_reinit(a,clear) + subroutine c_cuda_elg_reinit(a,clear) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a logical, intent(in), optional :: clear integer(psb_ipk_) :: isz, err_act character(len=20) :: name='reinit' @@ -367,14 +367,14 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine c_elg_reinit + end subroutine c_cuda_elg_reinit - subroutine c_elg_free(a) + subroutine c_cuda_elg_free(a) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) @@ -384,12 +384,12 @@ contains return - end subroutine c_elg_free + end subroutine c_cuda_elg_free - subroutine c_elg_sync(a) + subroutine c_cuda_elg_sync(a) implicit none - class(psb_c_elg_sparse_mat), target, intent(in) :: a - class(psb_c_elg_sparse_mat), pointer :: tmpa + class(psb_c_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -401,83 +401,83 @@ contains call tmpa%set_sync() return - end subroutine c_elg_sync + end subroutine c_cuda_elg_sync - subroutine c_elg_set_host(a) + subroutine c_cuda_elg_set_host(a) implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine c_elg_set_host + end subroutine c_cuda_elg_set_host - subroutine c_elg_set_dev(a) + subroutine c_cuda_elg_set_dev(a) implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine c_elg_set_dev + end subroutine c_cuda_elg_set_dev - subroutine c_elg_set_sync(a) + subroutine c_cuda_elg_set_sync(a) implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine c_elg_set_sync + end subroutine c_cuda_elg_set_sync - function c_elg_is_dev(a) result(res) + function c_cuda_elg_is_dev(a) result(res) implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function c_elg_is_dev + end function c_cuda_elg_is_dev - function c_elg_is_host(a) result(res) + function c_cuda_elg_is_host(a) result(res) implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function c_elg_is_host + end function c_cuda_elg_is_host - function c_elg_is_sync(a) result(res) + function c_cuda_elg_is_sync(a) result(res) implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function c_elg_is_sync + end function c_cuda_elg_is_sync - subroutine c_elg_finalize(a) + subroutine c_cuda_elg_finalize(a) use elldev_mod implicit none - type(psb_c_elg_sparse_mat), intent(inout) :: a + type(psb_c_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine c_elg_finalize + end subroutine c_cuda_elg_finalize #else interface - subroutine psb_c_elg_asb(a) - import :: psb_c_elg_sparse_mat - class(psb_c_elg_sparse_mat), intent(inout) :: a - end subroutine psb_c_elg_asb + subroutine psb_c_cuda_elg_asb(a) + import :: psb_c_cuda_elg_sparse_mat + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_elg_asb end interface interface - subroutine psb_c_elg_mold(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_mold(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_mold + end subroutine psb_c_cuda_elg_mold end interface #endif -end module psb_c_elg_mat_mod +end module psb_c_cuda_elg_mat_mod diff --git a/cuda/psb_c_hdiag_mat_mod.F90 b/cuda/psb_c_cuda_hdiag_mat_mod.F90 similarity index 50% rename from cuda/psb_c_hdiag_mat_mod.F90 rename to cuda/psb_c_cuda_hdiag_mat_mod.F90 index 8206abed..54f47684 100644 --- a/cuda/psb_c_hdiag_mat_mod.F90 +++ b/cuda/psb_c_cuda_hdiag_mat_mod.F90 @@ -30,182 +30,182 @@ ! -module psb_c_hdiag_mat_mod +module psb_c_cuda_hdiag_mat_mod use iso_c_binding use psb_base_mod use psb_c_hdia_mat_mod - type, extends(psb_c_hdia_sparse_mat) :: psb_c_hdiag_sparse_mat + type, extends(psb_c_hdia_sparse_mat) :: psb_c_cuda_hdiag_sparse_mat ! #ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => c_hdiag_get_fmt - ! procedure, pass(a) :: sizeof => c_hdiag_sizeof - procedure, pass(a) :: vect_mv => psb_c_hdiag_vect_mv - ! procedure, pass(a) :: csmm => psb_c_hdiag_csmm - procedure, pass(a) :: csmv => psb_c_hdiag_csmv - ! procedure, pass(a) :: in_vect_sv => psb_c_hdiag_inner_vect_sv - ! procedure, pass(a) :: scals => psb_c_hdiag_scals - ! procedure, pass(a) :: scalv => psb_c_hdiag_scal - ! procedure, pass(a) :: reallocate_nz => psb_c_hdiag_reallocate_nz - ! procedure, pass(a) :: allocate_mnnz => psb_c_hdiag_allocate_mnnz + procedure, nopass :: get_fmt => c_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => c_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_c_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_c_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_c_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_c_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_c_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_c_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_c_cuda_hdiag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_hdiag_from_coo - ! procedure, pass(a) :: cp_from_fmt => psb_c_cp_hdiag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_hdiag_from_coo - ! procedure, pass(a) :: mv_from_fmt => psb_c_mv_hdiag_from_fmt - procedure, pass(a) :: free => c_hdiag_free - procedure, pass(a) :: mold => psb_c_hdiag_mold - procedure, pass(a) :: to_gpu => psb_c_hdiag_to_gpu - final :: c_hdiag_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => c_cuda_hdiag_free + procedure, pass(a) :: mold => psb_c_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_hdiag_to_gpu + final :: c_cuda_hdiag_finalize #else contains - procedure, pass(a) :: mold => psb_c_hdiag_mold + procedure, pass(a) :: mold => psb_c_cuda_hdiag_mold #endif - end type psb_c_hdiag_sparse_mat + end type psb_c_cuda_hdiag_sparse_mat #ifdef HAVE_SPGPU - private :: c_hdiag_get_nzeros, c_hdiag_free, c_hdiag_get_fmt, & - & c_hdiag_get_size, c_hdiag_sizeof, c_hdiag_get_nz_row + private :: c_cuda_hdiag_get_nzeros, c_cuda_hdiag_free, c_cuda_hdiag_get_fmt, & + & c_cuda_hdiag_get_size, c_cuda_hdiag_sizeof, c_cuda_hdiag_get_nz_row interface - subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hdiag_vect_mv + end subroutine psb_c_cuda_hdiag_vect_mv end interface !!$ interface -!!$ subroutine psb_c_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_c_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type -!!$ class(psb_c_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_c_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_hdiag_inner_vect_sv +!!$ end subroutine psb_c_cuda_hdiag_inner_vect_sv !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_hdiag_reallocate_nz(nz,a) -!!$ import :: psb_c_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_c_hdiag_reallocate_nz +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_cuda_hdiag_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_hdiag_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_c_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_c_hdiag_allocate_mnnz +!!$ end subroutine psb_c_cuda_hdiag_allocate_mnnz !!$ end interface interface - subroutine psb_c_hdiag_mold(a,b,info) - import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hdiag_mold(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hdiag_mold + end subroutine psb_c_cuda_hdiag_mold end interface interface - subroutine psb_c_hdiag_to_gpu(a,info) - import :: psb_c_hdiag_sparse_mat, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hdiag_to_gpu(a,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hdiag_to_gpu + end subroutine psb_c_cuda_hdiag_to_gpu end interface interface - subroutine psb_c_cp_hdiag_from_coo(a,b,info) - import :: psb_c_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_hdiag_from_coo + end subroutine psb_c_cuda_cp_hdiag_from_coo end interface !!$ interface -!!$ subroutine psb_c_cp_hdiag_from_fmt(a,b,info) -!!$ import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_c_base_sparse_mat), intent(in) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_c_cp_hdiag_from_fmt +!!$ end subroutine psb_c_cuda_cp_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_c_mv_hdiag_from_coo(a,b,info) - import :: psb_c_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_hdiag_from_coo + end subroutine psb_c_cuda_mv_hdiag_from_coo end interface !!$ !!$ interface -!!$ subroutine psb_c_mv_hdiag_from_fmt(a,b,info) -!!$ import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_c_base_sparse_mat), intent(inout) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_c_mv_hdiag_from_fmt +!!$ end subroutine psb_c_cuda_mv_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hdiag_csmv + end subroutine psb_c_cuda_hdiag_csmv end interface !!$ interface -!!$ subroutine psb_c_hdiag_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) !!$ complex(psb_spk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_hdiag_csmm +!!$ end subroutine psb_c_cuda_hdiag_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_hdiag_scal(d,a,info, side) -!!$ import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ complex(psb_spk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_c_hdiag_scal +!!$ end subroutine psb_c_cuda_hdiag_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_hdiag_scals(d,a,info) -!!$ import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ complex(psb_spk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_c_hdiag_scals +!!$ end subroutine psb_c_cuda_hdiag_scals !!$ end interface !!$ @@ -223,11 +223,11 @@ contains ! ! == =================================== - function c_hdiag_get_fmt() result(res) + function c_cuda_hdiag_get_fmt() result(res) implicit none character(len=5) :: res res = 'HDIAG' - end function c_hdiag_get_fmt + end function c_cuda_hdiag_get_fmt @@ -243,11 +243,11 @@ contains ! ! == =================================== - subroutine c_hdiag_free(a) + subroutine c_cuda_hdiag_free(a) use hdiagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -256,12 +256,12 @@ contains return - end subroutine c_hdiag_free + end subroutine c_cuda_hdiag_free - subroutine c_hdiag_finalize(a) + subroutine c_cuda_hdiag_finalize(a) use hdiagdev_mod implicit none - type(psb_c_hdiag_sparse_mat), intent(inout) :: a + type(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -269,19 +269,19 @@ contains call a%psb_c_hdia_sparse_mat%free() return - end subroutine c_hdiag_finalize + end subroutine c_cuda_hdiag_finalize #else interface - subroutine psb_c_hdiag_mold(a,b,info) - import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hdiag_mold(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hdiag_mold + end subroutine psb_c_cuda_hdiag_mold end interface #endif -end module psb_c_hdiag_mat_mod +end module psb_c_cuda_hdiag_mat_mod diff --git a/cuda/psb_c_hlg_mat_mod.F90 b/cuda/psb_c_cuda_hlg_mat_mod.F90 similarity index 50% rename from cuda/psb_c_hlg_mat_mod.F90 rename to cuda/psb_c_cuda_hlg_mat_mod.F90 index 9236a202..74284f30 100644 --- a/cuda/psb_c_hlg_mat_mod.F90 +++ b/cuda/psb_c_cuda_hlg_mat_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_c_hlg_mat_mod +module psb_c_cuda_hlg_mat_mod use iso_c_binding use psb_c_mat_mod @@ -41,7 +41,7 @@ module psb_c_hlg_mat_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_c_hll_sparse_mat) :: psb_c_hlg_sparse_mat + type, extends(psb_c_hll_sparse_mat) :: psb_c_cuda_hlg_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,186 +54,186 @@ module psb_c_hlg_mat_mod integer :: devstate = is_host contains - procedure, nopass :: get_fmt => c_hlg_get_fmt - procedure, pass(a) :: sizeof => c_hlg_sizeof - procedure, pass(a) :: vect_mv => psb_c_hlg_vect_mv - procedure, pass(a) :: csmm => psb_c_hlg_csmm - procedure, pass(a) :: csmv => psb_c_hlg_csmv - procedure, pass(a) :: in_vect_sv => psb_c_hlg_inner_vect_sv - procedure, pass(a) :: scals => psb_c_hlg_scals - procedure, pass(a) :: scalv => psb_c_hlg_scal - procedure, pass(a) :: reallocate_nz => psb_c_hlg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_c_hlg_allocate_mnnz + procedure, nopass :: get_fmt => c_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_c_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_c_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_c_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_hlg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_hlg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_cp_hlg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_hlg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_mv_hlg_from_fmt - procedure, pass(a) :: free => c_hlg_free - procedure, pass(a) :: mold => psb_c_hlg_mold - procedure, pass(a) :: is_host => c_hlg_is_host - procedure, pass(a) :: is_dev => c_hlg_is_dev - procedure, pass(a) :: is_sync => c_hlg_is_sync - procedure, pass(a) :: set_host => c_hlg_set_host - procedure, pass(a) :: set_dev => c_hlg_set_dev - procedure, pass(a) :: set_sync => c_hlg_set_sync - procedure, pass(a) :: sync => c_hlg_sync - procedure, pass(a) :: from_gpu => psb_c_hlg_from_gpu - procedure, pass(a) :: to_gpu => psb_c_hlg_to_gpu - final :: c_hlg_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => c_cuda_hlg_free + procedure, pass(a) :: mold => psb_c_cuda_hlg_mold + procedure, pass(a) :: is_host => c_cuda_hlg_is_host + procedure, pass(a) :: is_dev => c_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => c_cuda_hlg_is_sync + procedure, pass(a) :: set_host => c_cuda_hlg_set_host + procedure, pass(a) :: set_dev => c_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => c_cuda_hlg_set_sync + procedure, pass(a) :: sync => c_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_c_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_c_cuda_hlg_to_gpu + final :: c_cuda_hlg_finalize #else contains - procedure, pass(a) :: mold => psb_c_hlg_mold + procedure, pass(a) :: mold => psb_c_cuda_hlg_mold #endif - end type psb_c_hlg_sparse_mat + end type psb_c_cuda_hlg_sparse_mat #ifdef HAVE_SPGPU - private :: c_hlg_get_nzeros, c_hlg_free, c_hlg_get_fmt, & - & c_hlg_get_size, c_hlg_sizeof, c_hlg_get_nz_row + private :: c_cuda_hlg_get_nzeros, c_cuda_hlg_free, c_cuda_hlg_get_fmt, & + & c_cuda_hlg_get_size, c_cuda_hlg_sizeof, c_cuda_hlg_get_nz_row interface - subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hlg_vect_mv + end subroutine psb_c_cuda_hlg_vect_mv end interface interface - subroutine psb_c_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_c_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hlg_inner_vect_sv + end subroutine psb_c_cuda_hlg_inner_vect_sv end interface interface - subroutine psb_c_hlg_reallocate_nz(nz,a) - import :: psb_c_hlg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_hlg_reallocate_nz(nz,a) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_c_hlg_sparse_mat), intent(inout) :: a - end subroutine psb_c_hlg_reallocate_nz + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_hlg_reallocate_nz end interface interface - subroutine psb_c_hlg_allocate_mnnz(m,n,a,nz) - import :: psb_c_hlg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_c_hlg_allocate_mnnz + end subroutine psb_c_cuda_hlg_allocate_mnnz end interface interface - subroutine psb_c_hlg_mold(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_mold(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hlg_mold + end subroutine psb_c_cuda_hlg_mold end interface interface - subroutine psb_c_hlg_from_gpu(a,info) - import :: psb_c_hlg_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hlg_from_gpu(a,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hlg_from_gpu + end subroutine psb_c_cuda_hlg_from_gpu end interface interface - subroutine psb_c_hlg_to_gpu(a,info, nzrm) - import :: psb_c_hlg_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_c_hlg_to_gpu + end subroutine psb_c_cuda_hlg_to_gpu end interface interface - subroutine psb_c_cp_hlg_from_coo(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_hlg_from_coo + end subroutine psb_c_cuda_cp_hlg_from_coo end interface interface - subroutine psb_c_cp_hlg_from_fmt(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_hlg_from_fmt + end subroutine psb_c_cuda_cp_hlg_from_fmt end interface interface - subroutine psb_c_mv_hlg_from_coo(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_hlg_from_coo + end subroutine psb_c_cuda_mv_hlg_from_coo end interface interface - subroutine psb_c_mv_hlg_from_fmt(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_hlg_from_fmt + end subroutine psb_c_cuda_mv_hlg_from_fmt end interface interface - subroutine psb_c_hlg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hlg_csmv + end subroutine psb_c_cuda_hlg_csmv end interface interface - subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hlg_csmm + end subroutine psb_c_cuda_hlg_csmm end interface interface - subroutine psb_c_hlg_scal(d,a,info, side) - import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hlg_scal(d,a,info, side) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_c_hlg_scal + end subroutine psb_c_cuda_hlg_scal end interface interface - subroutine psb_c_hlg_scals(d,a,info) - import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hlg_scals(d,a,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hlg_scals + end subroutine psb_c_cuda_hlg_scals end interface @@ -252,9 +252,9 @@ contains ! == =================================== - function c_hlg_sizeof(a) result(res) + function c_cuda_hlg_sizeof(a) result(res) implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res @@ -269,13 +269,13 @@ contains ! on the GPU device side? ! res = 2*res - end function c_hlg_sizeof + end function c_cuda_hlg_sizeof - function c_hlg_get_fmt() result(res) + function c_cuda_hlg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HLG' - end function c_hlg_get_fmt + end function c_cuda_hlg_get_fmt @@ -291,11 +291,11 @@ contains ! ! == =================================== - subroutine c_hlg_free(a) + subroutine c_cuda_hlg_free(a) use hlldev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) @@ -304,13 +304,13 @@ contains return - end subroutine c_hlg_free + end subroutine c_cuda_hlg_free - subroutine c_hlg_sync(a) + subroutine c_cuda_hlg_sync(a) implicit none - class(psb_c_hlg_sparse_mat), target, intent(in) :: a - class(psb_c_hlg_sparse_mat), pointer :: tmpa + class(psb_c_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -322,77 +322,77 @@ contains call tmpa%set_sync() return - end subroutine c_hlg_sync + end subroutine c_cuda_hlg_sync - subroutine c_hlg_set_host(a) + subroutine c_cuda_hlg_set_host(a) implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine c_hlg_set_host + end subroutine c_cuda_hlg_set_host - subroutine c_hlg_set_dev(a) + subroutine c_cuda_hlg_set_dev(a) implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine c_hlg_set_dev + end subroutine c_cuda_hlg_set_dev - subroutine c_hlg_set_sync(a) + subroutine c_cuda_hlg_set_sync(a) implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine c_hlg_set_sync + end subroutine c_cuda_hlg_set_sync - function c_hlg_is_dev(a) result(res) + function c_cuda_hlg_is_dev(a) result(res) implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function c_hlg_is_dev + end function c_cuda_hlg_is_dev - function c_hlg_is_host(a) result(res) + function c_cuda_hlg_is_host(a) result(res) implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function c_hlg_is_host + end function c_cuda_hlg_is_host - function c_hlg_is_sync(a) result(res) + function c_cuda_hlg_is_sync(a) result(res) implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function c_hlg_is_sync + end function c_cuda_hlg_is_sync - subroutine c_hlg_finalize(a) + subroutine c_cuda_hlg_finalize(a) use hlldev_mod implicit none - type(psb_c_hlg_sparse_mat), intent(inout) :: a + type(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine c_hlg_finalize + end subroutine c_cuda_hlg_finalize #else interface - subroutine psb_c_hlg_mold(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_mold(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hlg_mold + end subroutine psb_c_cuda_hlg_mold end interface #endif -end module psb_c_hlg_mat_mod +end module psb_c_cuda_hlg_mat_mod diff --git a/cuda/psb_c_hybg_mat_mod.F90 b/cuda/psb_c_cuda_hybg_mat_mod.F90 similarity index 52% rename from cuda/psb_c_hybg_mat_mod.F90 rename to cuda/psb_c_cuda_hybg_mat_mod.F90 index d5c605ec..d16988ba 100644 --- a/cuda/psb_c_hybg_mat_mod.F90 +++ b/cuda/psb_c_cuda_hybg_mat_mod.F90 @@ -31,13 +31,13 @@ #if CUDA_SHORT_VERSION <= 10 -module psb_c_hybg_mat_mod +module psb_c_cuda_hybg_mat_mod use iso_c_binding use psb_c_mat_mod use cusparse_mod - type, extends(psb_c_csr_sparse_mat) :: psb_c_hybg_sparse_mat + type, extends(psb_c_csr_sparse_mat) :: psb_c_cuda_hybg_sparse_mat ! ! HYBG. An interface to the cuSPARSE HYB ! On the CPU side we keep a CSR storage. @@ -49,170 +49,170 @@ module psb_c_hybg_mat_mod type(c_Hmat) :: deviceMat contains - procedure, nopass :: get_fmt => c_hybg_get_fmt - procedure, pass(a) :: sizeof => c_hybg_sizeof - procedure, pass(a) :: vect_mv => psb_c_hybg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_c_hybg_inner_vect_sv - procedure, pass(a) :: csmm => psb_c_hybg_csmm - procedure, pass(a) :: csmv => psb_c_hybg_csmv - procedure, pass(a) :: scals => psb_c_hybg_scals - procedure, pass(a) :: scalv => psb_c_hybg_scal - procedure, pass(a) :: reallocate_nz => psb_c_hybg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_c_hybg_allocate_mnnz + procedure, nopass :: get_fmt => c_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_c_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_c_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_hybg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_hybg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_cp_hybg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_hybg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_mv_hybg_from_fmt - procedure, pass(a) :: free => c_hybg_free - procedure, pass(a) :: mold => psb_c_hybg_mold - procedure, pass(a) :: to_gpu => psb_c_hybg_to_gpu - final :: c_hybg_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => c_cuda_hybg_free + procedure, pass(a) :: mold => psb_c_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_hybg_to_gpu + final :: c_cuda_hybg_finalize #else contains - procedure, pass(a) :: mold => psb_c_hybg_mold + procedure, pass(a) :: mold => psb_c_cuda_hybg_mold #endif - end type psb_c_hybg_sparse_mat + end type psb_c_cuda_hybg_sparse_mat #ifdef HAVE_SPGPU - private :: c_hybg_get_nzeros, c_hybg_free, c_hybg_get_fmt, & - & c_hybg_get_size, c_hybg_sizeof, c_hybg_get_nz_row + private :: c_cuda_hybg_get_nzeros, c_cuda_hybg_free, c_cuda_hybg_get_fmt, & + & c_cuda_hybg_get_size, c_cuda_hybg_sizeof, c_cuda_hybg_get_nz_row interface - subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hybg_inner_vect_sv + end subroutine psb_c_cuda_hybg_inner_vect_sv end interface interface - subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hybg_vect_mv + end subroutine psb_c_cuda_hybg_vect_mv end interface interface - subroutine psb_c_hybg_reallocate_nz(nz,a) - import :: psb_c_hybg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_hybg_reallocate_nz(nz,a) + import :: psb_c_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_c_hybg_sparse_mat), intent(inout) :: a - end subroutine psb_c_hybg_reallocate_nz + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_hybg_reallocate_nz end interface interface - subroutine psb_c_hybg_allocate_mnnz(m,n,a,nz) - import :: psb_c_hybg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_c_hybg_allocate_mnnz + end subroutine psb_c_cuda_hybg_allocate_mnnz end interface interface - subroutine psb_c_hybg_mold(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_mold(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hybg_mold + end subroutine psb_c_cuda_hybg_mold end interface interface - subroutine psb_c_hybg_to_gpu(a,info, nzrm) - import :: psb_c_hybg_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_c_hybg_to_gpu + end subroutine psb_c_cuda_hybg_to_gpu end interface interface - subroutine psb_c_cp_hybg_from_coo(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_hybg_from_coo + end subroutine psb_c_cuda_cp_hybg_from_coo end interface interface - subroutine psb_c_cp_hybg_from_fmt(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_hybg_from_fmt + end subroutine psb_c_cuda_cp_hybg_from_fmt end interface interface - subroutine psb_c_mv_hybg_from_coo(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_hybg_from_coo + end subroutine psb_c_cuda_mv_hybg_from_coo end interface interface - subroutine psb_c_mv_hybg_from_fmt(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_hybg_from_fmt + end subroutine psb_c_cuda_mv_hybg_from_fmt end interface interface - subroutine psb_c_hybg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hybg_csmv + end subroutine psb_c_cuda_hybg_csmv end interface interface - subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hybg_csmm + end subroutine psb_c_cuda_hybg_csmm end interface interface - subroutine psb_c_hybg_scal(d,a,info,side) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hybg_scal(d,a,info,side) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_c_hybg_scal + end subroutine psb_c_cuda_hybg_scal end interface interface - subroutine psb_c_hybg_scals(d,a,info) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hybg_scals(d,a,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hybg_scals + end subroutine psb_c_cuda_hybg_scals end interface @@ -231,9 +231,9 @@ contains ! == =================================== - function c_hybg_sizeof(a) result(res) + function c_cuda_hybg_sizeof(a) result(res) implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 res = res + (2*psb_sizeof_sp) * size(a%val) @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function c_hybg_sizeof + end function c_cuda_hybg_sizeof - function c_hybg_get_fmt() result(res) + function c_cuda_hybg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HYBG' - end function c_hybg_get_fmt + end function c_cuda_hybg_get_fmt @@ -265,42 +265,42 @@ contains ! ! == =================================== - subroutine c_hybg_free(a) + subroutine c_cuda_hybg_free(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) call a%psb_c_csr_sparse_mat%free() return - end subroutine c_hybg_free + end subroutine c_cuda_hybg_free - subroutine c_hybg_finalize(a) + subroutine c_cuda_hybg_finalize(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - type(psb_c_hybg_sparse_mat), intent(inout) :: a + type(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) return - end subroutine c_hybg_finalize + end subroutine c_cuda_hybg_finalize #else interface - subroutine psb_c_hybg_mold(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_mold(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hybg_mold + end subroutine psb_c_cuda_hybg_mold end interface #endif -end module psb_c_hybg_mat_mod +end module psb_c_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_c_gpu_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 similarity index 72% rename from cuda/psb_c_gpu_vect_mod.F90 rename to cuda/psb_c_cuda_vect_mod.F90 index 4c31154f..be06167e 100644 --- a/cuda/psb_c_gpu_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -30,15 +30,15 @@ ! -module psb_c_gpu_vect_mod +module psb_c_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod use psb_c_vect_mod use psb_i_vect_mod #ifdef HAVE_SPGPU - use psb_gpu_env_mod - use psb_i_gpu_vect_mod + use psb_cuda_env_mod + use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_c_vectordev_mod #endif @@ -47,7 +47,7 @@ module psb_c_gpu_vect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_c_base_vect_type) :: psb_c_vect_gpu + type, extends(psb_c_base_vect_type) :: psb_c_vect_cuda #ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr @@ -59,66 +59,66 @@ module psb_c_gpu_vect_mod type(c_ptr) :: i_buf = c_null_ptr integer :: i_buf_sz = 0 contains - procedure, pass(x) :: get_nrows => c_gpu_get_nrows - procedure, nopass :: get_fmt => c_gpu_get_fmt - - procedure, pass(x) :: all => c_gpu_all - procedure, pass(x) :: zero => c_gpu_zero - procedure, pass(x) :: asb_m => c_gpu_asb_m - procedure, pass(x) :: sync => c_gpu_sync - procedure, pass(x) :: sync_space => c_gpu_sync_space - procedure, pass(x) :: bld_x => c_gpu_bld_x - procedure, pass(x) :: bld_mn => c_gpu_bld_mn - procedure, pass(x) :: free => c_gpu_free - procedure, pass(x) :: ins_a => c_gpu_ins_a - procedure, pass(x) :: ins_v => c_gpu_ins_v - procedure, pass(x) :: is_host => c_gpu_is_host - procedure, pass(x) :: is_dev => c_gpu_is_dev - procedure, pass(x) :: is_sync => c_gpu_is_sync - procedure, pass(x) :: set_host => c_gpu_set_host - procedure, pass(x) :: set_dev => c_gpu_set_dev - procedure, pass(x) :: set_sync => c_gpu_set_sync - procedure, pass(x) :: set_scal => c_gpu_set_scal -!!$ procedure, pass(x) :: set_vect => c_gpu_set_vect - procedure, pass(x) :: gthzv_x => c_gpu_gthzv_x - procedure, pass(y) :: sctb => c_gpu_sctb - procedure, pass(y) :: sctb_x => c_gpu_sctb_x - procedure, pass(x) :: gthzbuf => c_gpu_gthzbuf - procedure, pass(y) :: sctb_buf => c_gpu_sctb_buf - procedure, pass(x) :: new_buffer => c_gpu_new_buffer - procedure, nopass :: device_wait => c_gpu_device_wait - procedure, pass(x) :: free_buffer => c_gpu_free_buffer - procedure, pass(x) :: maybe_free_buffer => c_gpu_maybe_free_buffer - procedure, pass(x) :: dot_v => c_gpu_dot_v - procedure, pass(x) :: dot_a => c_gpu_dot_a - procedure, pass(y) :: axpby_v => c_gpu_axpby_v - procedure, pass(y) :: axpby_a => c_gpu_axpby_a - procedure, pass(y) :: mlt_v => c_gpu_mlt_v - procedure, pass(y) :: mlt_a => c_gpu_mlt_a - procedure, pass(z) :: mlt_a_2 => c_gpu_mlt_a_2 - procedure, pass(z) :: mlt_v_2 => c_gpu_mlt_v_2 - procedure, pass(x) :: scal => c_gpu_scal - procedure, pass(x) :: nrm2 => c_gpu_nrm2 - procedure, pass(x) :: amax => c_gpu_amax - procedure, pass(x) :: asum => c_gpu_asum - procedure, pass(x) :: absval1 => c_gpu_absval1 - procedure, pass(x) :: absval2 => c_gpu_absval2 - - final :: c_gpu_vect_finalize + procedure, pass(x) :: get_nrows => c_cuda_get_nrows + procedure, nopass :: get_fmt => c_cuda_get_fmt + + procedure, pass(x) :: all => c_cuda_all + procedure, pass(x) :: zero => c_cuda_zero + procedure, pass(x) :: asb_m => c_cuda_asb_m + procedure, pass(x) :: sync => c_cuda_sync + procedure, pass(x) :: sync_space => c_cuda_sync_space + procedure, pass(x) :: bld_x => c_cuda_bld_x + procedure, pass(x) :: bld_mn => c_cuda_bld_mn + procedure, pass(x) :: free => c_cuda_free + procedure, pass(x) :: ins_a => c_cuda_ins_a + procedure, pass(x) :: ins_v => c_cuda_ins_v + procedure, pass(x) :: is_host => c_cuda_is_host + procedure, pass(x) :: is_dev => c_cuda_is_dev + procedure, pass(x) :: is_sync => c_cuda_is_sync + procedure, pass(x) :: set_host => c_cuda_set_host + procedure, pass(x) :: set_dev => c_cuda_set_dev + procedure, pass(x) :: set_sync => c_cuda_set_sync + procedure, pass(x) :: set_scal => c_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => c_cuda_set_vect + procedure, pass(x) :: gthzv_x => c_cuda_gthzv_x + procedure, pass(y) :: sctb => c_cuda_sctb + procedure, pass(y) :: sctb_x => c_cuda_sctb_x + procedure, pass(x) :: gthzbuf => c_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => c_cuda_sctb_buf + procedure, pass(x) :: new_buffer => c_cuda_new_buffer + procedure, nopass :: device_wait => c_cuda_device_wait + procedure, pass(x) :: free_buffer => c_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => c_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => c_cuda_dot_v + procedure, pass(x) :: dot_a => c_cuda_dot_a + procedure, pass(y) :: axpby_v => c_cuda_axpby_v + procedure, pass(y) :: axpby_a => c_cuda_axpby_a + procedure, pass(y) :: mlt_v => c_cuda_mlt_v + procedure, pass(y) :: mlt_a => c_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => c_cuda_mlt_v_2 + procedure, pass(x) :: scal => c_cuda_scal + procedure, pass(x) :: nrm2 => c_cuda_nrm2 + procedure, pass(x) :: amax => c_cuda_amax + procedure, pass(x) :: asum => c_cuda_asum + procedure, pass(x) :: absval1 => c_cuda_absval1 + procedure, pass(x) :: absval2 => c_cuda_absval2 + + final :: c_cuda_vect_finalize #endif - end type psb_c_vect_gpu + end type psb_c_vect_cuda - public :: psb_c_vect_gpu_ + public :: psb_c_vect_cuda_ private :: constructor - interface psb_c_vect_gpu_ + interface psb_c_vect_cuda_ module procedure constructor - end interface psb_c_vect_gpu_ + end interface psb_c_vect_cuda_ contains function constructor(x) result(this) complex(psb_spk_) :: x(:) - type(psb_c_vect_gpu) :: this + type(psb_c_vect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -128,20 +128,20 @@ contains #ifdef HAVE_SPGPU - subroutine c_gpu_device_wait() + subroutine c_cuda_device_wait() call psb_cudaSync() - end subroutine c_gpu_device_wait + end subroutine c_cuda_device_wait - subroutine c_gpu_new_buffer(n,x,info) + subroutine c_cuda_new_buffer(n,x,info) use psb_realloc_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(x%combuf)) then if (size(x%combuf) idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then ! ! Only need a sync in this branch; in the others ! cudamemCpy acts as a sync point. @@ -331,14 +331,14 @@ contains end select - end subroutine c_gpu_gthzv_x + end subroutine c_cuda_gthzv_x - subroutine c_gpu_gthzbuf(i,n,idx,x) - use psb_gpu_env_mod + subroutine c_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i,n class(psb_i_base_vect_type) :: idx - class(psb_c_vect_gpu) :: x + class(psb_c_vect_cuda) :: x integer :: info, ni info = 0 @@ -349,11 +349,11 @@ contains end if select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) @@ -384,14 +384,14 @@ contains end select - end subroutine c_gpu_gthzbuf + end subroutine c_cuda_gthzbuf - subroutine c_gpu_sctb(n,idx,x,beta,y) + subroutine c_cuda_sctb(n,idx,x,beta,y) implicit none !use psb_const_mod integer(psb_ipk_) :: n, idx(:) complex(psb_spk_) :: beta, x(:) - class(psb_c_vect_gpu) :: y + class(psb_c_vect_cuda) :: y integer(psb_ipk_) :: info if (n == 0) return @@ -401,24 +401,24 @@ contains call y%psb_c_base_vect_type%sctb(n,idx,x,beta) call y%set_host() - end subroutine c_gpu_sctb + end subroutine c_cuda_sctb - subroutine c_gpu_sctb_x(i,n,idx,x,beta,y) - use psb_gpu_env_mod + subroutine c_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx complex(psb_spk_) :: beta, x(:) - class(psb_c_vect_gpu) :: y + class(psb_c_vect_cuda) :: y integer :: info, ni select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() ! - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(y%pinned_buffer)) then if (size(y%pinned_buffer) < n) then call inner_unregister(y%pinned_buffer) @@ -506,16 +506,16 @@ contains call psb_cudaSync() call y%set_dev() - end subroutine c_gpu_sctb_x + end subroutine c_cuda_sctb_x - subroutine c_gpu_sctb_buf(i,n,idx,beta,y) + subroutine c_cuda_sctb_buf(i,n,idx,beta,y) use psi_serial_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx complex(psb_spk_) :: beta - class(psb_c_vect_gpu) :: y + class(psb_c_vect_cuda) :: y integer(psb_ipk_) :: info, ni !!$ write(0,*) 'Starting sctb_buf' @@ -526,11 +526,11 @@ contains select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) else @@ -557,106 +557,106 @@ contains end select !!$ write(0,*) 'Done sctb_buf' - end subroutine c_gpu_sctb_buf + end subroutine c_cuda_sctb_buf - subroutine c_gpu_bld_x(x,this) + subroutine c_cuda_bld_x(x,this) use psb_base_mod complex(psb_spk_), intent(in) :: this(:) - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call psb_realloc(size(this),x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'c_gpu_bld_x',& + call psb_errpush(info,'c_cuda_bld_x',& & i_err=(/size(this),izero,izero,izero,izero/)) end if x%v(:) = this(:) call x%set_host() call x%sync() - end subroutine c_gpu_bld_x + end subroutine c_cuda_bld_x - subroutine c_gpu_bld_mn(x,n) + subroutine c_cuda_bld_mn(x,n) integer(psb_mpk_), intent(in) :: n - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(n,info) if (info /= 0) then - call psb_errpush(info,'c_gpu_bld_n',i_err=(/n,n,n,n,n/)) + call psb_errpush(info,'c_cuda_bld_n',i_err=(/n,n,n,n,n/)) end if - end subroutine c_gpu_bld_mn + end subroutine c_cuda_bld_mn - subroutine c_gpu_set_host(x) + subroutine c_cuda_set_host(x) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x x%state = is_host - end subroutine c_gpu_set_host + end subroutine c_cuda_set_host - subroutine c_gpu_set_dev(x) + subroutine c_cuda_set_dev(x) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x x%state = is_dev - end subroutine c_gpu_set_dev + end subroutine c_cuda_set_dev - subroutine c_gpu_set_sync(x) + subroutine c_cuda_set_sync(x) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x x%state = is_sync - end subroutine c_gpu_set_sync + end subroutine c_cuda_set_sync - function c_gpu_is_dev(x) result(res) + function c_cuda_is_dev(x) result(res) implicit none - class(psb_c_vect_gpu), intent(in) :: x + class(psb_c_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function c_gpu_is_dev + end function c_cuda_is_dev - function c_gpu_is_host(x) result(res) + function c_cuda_is_host(x) result(res) implicit none - class(psb_c_vect_gpu), intent(in) :: x + class(psb_c_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function c_gpu_is_host + end function c_cuda_is_host - function c_gpu_is_sync(x) result(res) + function c_cuda_is_sync(x) result(res) implicit none - class(psb_c_vect_gpu), intent(in) :: x + class(psb_c_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function c_gpu_is_sync + end function c_cuda_is_sync - function c_gpu_get_nrows(x) result(res) + function c_cuda_get_nrows(x) result(res) implicit none - class(psb_c_vect_gpu), intent(in) :: x + class(psb_c_vect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = size(x%v) - end function c_gpu_get_nrows + end function c_cuda_get_nrows - function c_gpu_get_fmt() result(res) + function c_cuda_get_fmt() result(res) implicit none character(len=5) :: res res = 'cGPU' - end function c_gpu_get_fmt + end function c_cuda_get_fmt - subroutine c_gpu_all(n, x, info) + subroutine c_cuda_all(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: n - class(psb_c_vect_gpu), intent(out) :: x + class(psb_c_vect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(n,x%v,info) @@ -664,26 +664,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'c_gpu_all',& + call psb_errpush(info,'c_cuda_all',& & i_err=(/n,n,n,n,n/)) end if - end subroutine c_gpu_all + end subroutine c_cuda_all - subroutine c_gpu_zero(x) + subroutine c_cuda_zero(x) use psi_serial_mod implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=czero call x%set_host() - end subroutine c_gpu_zero + end subroutine c_cuda_zero - subroutine c_gpu_asb_m(n, x, info) + subroutine c_cuda_asb_m(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_mpk_), intent(in) :: n - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: nd @@ -703,12 +703,12 @@ contains end if end if - end subroutine c_gpu_asb_m + end subroutine c_cuda_asb_m - subroutine c_gpu_sync_space(x,info) + subroutine c_cuda_sync_space(x,info) use psb_base_mod, only : psb_realloc implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh, nd @@ -747,12 +747,12 @@ contains end if end if - end subroutine c_gpu_sync_space + end subroutine c_cuda_sync_space - subroutine c_gpu_sync(x) + subroutine c_cuda_sync(x) use psb_base_mod, only : psb_realloc implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -778,31 +778,31 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'c_gpu_sync') + call psb_errpush(info,'c_cuda_sync') end if - end subroutine c_gpu_sync + end subroutine c_cuda_sync - subroutine c_gpu_free(x, info) + subroutine c_cuda_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) if (c_associated(x%deviceVect)) then -!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' call freeMultiVecDevice(x%deviceVect) x%deviceVect=c_null_ptr end if call x%free_buffer(info) call x%set_sync() - end subroutine c_gpu_free + end subroutine c_cuda_free - subroutine c_gpu_set_scal(x,val,first,last) - class(psb_c_vect_gpu), intent(inout) :: x + subroutine c_cuda_set_scal(x,val,first,last) + class(psb_c_vect_cuda), intent(inout) :: x complex(psb_spk_), intent(in) :: val integer(psb_ipk_), optional :: first, last @@ -817,10 +817,10 @@ contains info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() - end subroutine c_gpu_set_scal + end subroutine c_cuda_set_scal !!$ -!!$ subroutine c_gpu_set_vect(x,val) -!!$ class(psb_c_vect_gpu), intent(inout) :: x +!!$ subroutine c_cuda_set_vect(x,val) +!!$ class(psb_c_vect_cuda), intent(inout) :: x !!$ complex(psb_spk_), intent(in) :: val(:) !!$ integer(psb_ipk_) :: nr !!$ integer(psb_ipk_) :: info @@ -829,13 +829,13 @@ contains !!$ call x%psb_c_base_vect_type%set_vect(val) !!$ call x%set_host() !!$ -!!$ end subroutine c_gpu_set_vect +!!$ end subroutine c_cuda_set_vect - function c_gpu_dot_v(n,x,y) result(res) + function c_cuda_dot_v(n,x,y) result(res) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n complex(psb_spk_) :: res @@ -852,13 +852,13 @@ contains type is (psb_c_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n,x%v,1,yy%v,1) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) if (info /= 0) then info = psb_err_internal_error_ - call psb_errpush(info,'c_gpu_dot_v') + call psb_errpush(info,'c_cuda_dot_v') end if class default @@ -867,11 +867,11 @@ contains res = y%dot(n,x%v) end select - end function c_gpu_dot_v + end function c_cuda_dot_v - function c_gpu_dot_a(n,x,y) result(res) + function c_cuda_dot_a(n,x,y) result(res) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x complex(psb_spk_), intent(in) :: y(:) integer(psb_ipk_), intent(in) :: n complex(psb_spk_) :: res @@ -880,14 +880,14 @@ contains if (x%is_dev()) call x%sync() res = ddot(n,y,1,x%v,1) - end function c_gpu_dot_a + end function c_cuda_dot_a - subroutine c_gpu_axpby_v(m,alpha, x, beta, y, info) + subroutine c_cuda_axpby_v(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m class(psb_c_base_vect_type), intent(inout) :: x - class(psb_c_vect_gpu), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: y complex(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny @@ -895,7 +895,7 @@ contains info = psb_success_ select type(xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) ! Do something different here if ((beta /= czero).and.y%is_host())& & call y%sync() @@ -915,14 +915,14 @@ contains call y%axpby(m,alpha,x%v,beta,info) end select - end subroutine c_gpu_axpby_v + end subroutine c_cuda_axpby_v - subroutine c_gpu_axpby_a(m,alpha, x, beta, y, info) + subroutine c_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m complex(psb_spk_), intent(in) :: x(:) - class(psb_c_vect_gpu), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: y complex(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info @@ -930,13 +930,13 @@ contains & call y%sync() call psb_geaxpby(m,alpha,x,beta,y%v,info) call y%set_host() - end subroutine c_gpu_axpby_a + end subroutine c_cuda_axpby_a - subroutine c_gpu_mlt_v(x, y, info) + subroutine c_cuda_mlt_v(x, y, info) use psi_serial_mod implicit none class(psb_c_base_vect_type), intent(inout) :: x - class(psb_c_vect_gpu), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -950,7 +950,7 @@ contains y%v(i) = y%v(i) * xx%v(i) end do call y%set_host() - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) ! Do something different here if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -963,13 +963,13 @@ contains call y%set_host() end select - end subroutine c_gpu_mlt_v + end subroutine c_cuda_mlt_v - subroutine c_gpu_mlt_a(x, y, info) + subroutine c_cuda_mlt_a(x, y, info) use psi_serial_mod implicit none complex(psb_spk_), intent(in) :: x(:) - class(psb_c_vect_gpu), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -977,15 +977,15 @@ contains if (y%is_dev()) call y%sync() call y%psb_c_base_vect_type%mlt(x,info) ! set_host() is invoked in the base method - end subroutine c_gpu_mlt_a + end subroutine c_cuda_mlt_a - subroutine c_gpu_mlt_a_2(alpha,x,y,beta,z,info) + subroutine c_cuda_mlt_a_2(alpha,x,y,beta,z,info) use psi_serial_mod implicit none complex(psb_spk_), intent(in) :: alpha,beta complex(psb_spk_), intent(in) :: x(:) complex(psb_spk_), intent(in) :: y(:) - class(psb_c_vect_gpu), intent(inout) :: z + class(psb_c_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -993,16 +993,16 @@ contains if (z%is_dev()) call z%sync() call z%psb_c_base_vect_type%mlt(alpha,x,y,beta,info) ! set_host() is invoked in the base method - end subroutine c_gpu_mlt_a_2 + end subroutine c_cuda_mlt_a_2 - subroutine c_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + subroutine c_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) use psi_serial_mod use psb_string_mod implicit none complex(psb_spk_), intent(in) :: alpha,beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y - class(psb_c_vect_gpu), intent(inout) :: z + class(psb_c_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n @@ -1025,9 +1025,9 @@ contains ! info = 0 select type(xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type (yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= czero).and.(z%is_host())) call z%sync() @@ -1049,23 +1049,23 @@ contains call z%psb_c_base_vect_type%mlt(alpha,x,y,beta,info) call z%set_host() end select - end subroutine c_gpu_mlt_v_2 + end subroutine c_cuda_mlt_v_2 - subroutine c_gpu_scal(alpha, x) + subroutine c_cuda_scal(alpha, x) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x complex(psb_spk_), intent (in) :: alpha integer(psb_ipk_) :: info if (x%is_host()) call x%sync() info = scalMultiVecDevice(alpha,x%deviceVect) call x%set_dev() - end subroutine c_gpu_scal + end subroutine c_cuda_scal - function c_gpu_nrm2(n,x) result(res) + function c_cuda_nrm2(n,x) result(res) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1073,11 +1073,11 @@ contains if (x%is_host()) call x%sync() info = nrm2MultiVecDeviceComplex(res,n,x%deviceVect) - end function c_gpu_nrm2 + end function c_cuda_nrm2 - function c_gpu_amax(n,x) result(res) + function c_cuda_amax(n,x) result(res) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1085,11 +1085,11 @@ contains if (x%is_host()) call x%sync() info = amaxMultiVecDeviceComplex(res,n,x%deviceVect) - end function c_gpu_amax + end function c_cuda_amax - function c_gpu_asum(n,x) result(res) + function c_cuda_asum(n,x) result(res) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1097,11 +1097,11 @@ contains if (x%is_host()) call x%sync() info = asumMultiVecDeviceComplex(res,n,x%deviceVect) - end function c_gpu_asum + end function c_cuda_asum - subroutine c_gpu_absval1(x) + subroutine c_cuda_absval1(x) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n integer(psb_ipk_) :: info @@ -1109,18 +1109,18 @@ contains n=x%get_nrows() info = absMultiVecDevice(n,cone,x%deviceVect) - end subroutine c_gpu_absval1 + end subroutine c_cuda_absval1 - subroutine c_gpu_absval2(x,y) + subroutine c_cuda_absval2(x,y) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_) :: n integer(psb_ipk_) :: info n=min(x%get_nrows(),y%get_nrows()) select type (yy=> y) - class is (psb_c_vect_gpu) + class is (psb_c_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = absMultiVecDevice(n,cone,x%deviceVect,yy%deviceVect) @@ -1129,67 +1129,67 @@ contains if (y%is_dev()) call y%sync() call x%psb_c_base_vect_type%absval(y) end select - end subroutine c_gpu_absval2 + end subroutine c_cuda_absval2 - subroutine c_gpu_vect_finalize(x) + subroutine c_cuda_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_c_vect_gpu), intent(inout) :: x + type(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 call x%free(info) - end subroutine c_gpu_vect_finalize + end subroutine c_cuda_vect_finalize - subroutine c_gpu_ins_v(n,irl,val,dupl,x,info) + subroutine c_cuda_ins_v(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl class(psb_i_base_vect_type), intent(inout) :: irl class(psb_c_base_vect_type), intent(inout) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, isz - logical :: done_gpu + logical :: done_cuda info = 0 if (psb_errstatus_fatal()) return - done_gpu = .false. + done_cuda = .false. select type(virl => irl) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type(vval => val) - class is (psb_c_vect_gpu) + class is (psb_c_vect_cuda) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() info = geinsMultiVecDeviceFloatComplex(n,virl%deviceVect,& & vval%deviceVect,dupl,1,x%deviceVect) call x%set_dev() - done_gpu=.true. + done_cuda=.true. end select end select - if (.not.done_gpu) then + if (.not.done_cuda) then if (irl%is_dev()) call irl%sync() if (val%is_dev()) call val%sync() call x%ins(n,irl%v,val%v,dupl,info) end if if (info /= 0) then - call psb_errpush(info,'gpu_vect_ins') + call psb_errpush(info,'cuda_vect_ins') return end if - end subroutine c_gpu_ins_v + end subroutine c_cuda_ins_v - subroutine c_gpu_ins_a(n,irl,val,dupl,x,info) + subroutine c_cuda_ins_a(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) complex(psb_spk_), intent(in) :: val(:) @@ -1202,11 +1202,11 @@ contains call x%psb_c_base_vect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine c_gpu_ins_a + end subroutine c_cuda_ins_a #endif -end module psb_c_gpu_vect_mod +end module psb_c_cuda_vect_mod ! @@ -1215,7 +1215,7 @@ end module psb_c_gpu_vect_mod -module psb_c_gpu_multivect_mod +module psb_c_cuda_multivect_mod use iso_c_binding use psb_const_mod use psb_error_mod @@ -1224,7 +1224,7 @@ module psb_c_gpu_multivect_mod use psb_i_multivect_mod #ifdef HAVE_SPGPU - use psb_i_gpu_multivect_mod + use psb_i_cuda_multivect_mod use psb_c_vectordev_mod #endif @@ -1232,7 +1232,7 @@ module psb_c_gpu_multivect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_c_base_multivect_type) :: psb_c_multivect_gpu + type, extends(psb_c_base_multivect_type) :: psb_c_multivect_cuda #ifdef HAVE_SPGPU integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 @@ -1240,48 +1240,48 @@ module psb_c_gpu_multivect_mod real(c_double), allocatable :: buffer(:,:) type(c_ptr) :: dt_buf = c_null_ptr contains - procedure, pass(x) :: get_nrows => c_gpu_multi_get_nrows - procedure, pass(x) :: get_ncols => c_gpu_multi_get_ncols - procedure, nopass :: get_fmt => c_gpu_multi_get_fmt -!!$ procedure, pass(x) :: dot_v => c_gpu_multi_dot_v -!!$ procedure, pass(x) :: dot_a => c_gpu_multi_dot_a -!!$ procedure, pass(y) :: axpby_v => c_gpu_multi_axpby_v -!!$ procedure, pass(y) :: axpby_a => c_gpu_multi_axpby_a -!!$ procedure, pass(y) :: mlt_v => c_gpu_multi_mlt_v -!!$ procedure, pass(y) :: mlt_a => c_gpu_multi_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => c_gpu_multi_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => c_gpu_multi_mlt_v_2 -!!$ procedure, pass(x) :: scal => c_gpu_multi_scal -!!$ procedure, pass(x) :: nrm2 => c_gpu_multi_nrm2 -!!$ procedure, pass(x) :: amax => c_gpu_multi_amax -!!$ procedure, pass(x) :: asum => c_gpu_multi_asum - procedure, pass(x) :: all => c_gpu_multi_all - procedure, pass(x) :: zero => c_gpu_multi_zero - procedure, pass(x) :: asb => c_gpu_multi_asb - procedure, pass(x) :: sync => c_gpu_multi_sync - procedure, pass(x) :: sync_space => c_gpu_multi_sync_space - procedure, pass(x) :: bld_x => c_gpu_multi_bld_x - procedure, pass(x) :: bld_n => c_gpu_multi_bld_n - procedure, pass(x) :: free => c_gpu_multi_free - procedure, pass(x) :: ins => c_gpu_multi_ins - procedure, pass(x) :: is_host => c_gpu_multi_is_host - procedure, pass(x) :: is_dev => c_gpu_multi_is_dev - procedure, pass(x) :: is_sync => c_gpu_multi_is_sync - procedure, pass(x) :: set_host => c_gpu_multi_set_host - procedure, pass(x) :: set_dev => c_gpu_multi_set_dev - procedure, pass(x) :: set_sync => c_gpu_multi_set_sync - procedure, pass(x) :: set_scal => c_gpu_multi_set_scal - procedure, pass(x) :: set_vect => c_gpu_multi_set_vect -!!$ procedure, pass(x) :: gthzv_x => c_gpu_multi_gthzv_x -!!$ procedure, pass(y) :: sctb => c_gpu_multi_sctb -!!$ procedure, pass(y) :: sctb_x => c_gpu_multi_sctb_x - final :: c_gpu_multi_vect_finalize + procedure, pass(x) :: get_nrows => c_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => c_cuda_multi_get_ncols + procedure, nopass :: get_fmt => c_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => c_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => c_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => c_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => c_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => c_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => c_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => c_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => c_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => c_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => c_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => c_cuda_multi_amax +!!$ procedure, pass(x) :: asum => c_cuda_multi_asum + procedure, pass(x) :: all => c_cuda_multi_all + procedure, pass(x) :: zero => c_cuda_multi_zero + procedure, pass(x) :: asb => c_cuda_multi_asb + procedure, pass(x) :: sync => c_cuda_multi_sync + procedure, pass(x) :: sync_space => c_cuda_multi_sync_space + procedure, pass(x) :: bld_x => c_cuda_multi_bld_x + procedure, pass(x) :: bld_n => c_cuda_multi_bld_n + procedure, pass(x) :: free => c_cuda_multi_free + procedure, pass(x) :: ins => c_cuda_multi_ins + procedure, pass(x) :: is_host => c_cuda_multi_is_host + procedure, pass(x) :: is_dev => c_cuda_multi_is_dev + procedure, pass(x) :: is_sync => c_cuda_multi_is_sync + procedure, pass(x) :: set_host => c_cuda_multi_set_host + procedure, pass(x) :: set_dev => c_cuda_multi_set_dev + procedure, pass(x) :: set_sync => c_cuda_multi_set_sync + procedure, pass(x) :: set_scal => c_cuda_multi_set_scal + procedure, pass(x) :: set_vect => c_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => c_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => c_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => c_cuda_multi_sctb_x + final :: c_cuda_multi_vect_finalize #endif - end type psb_c_multivect_gpu + end type psb_c_multivect_cuda - public :: psb_c_multivect_gpu + public :: psb_c_multivect_cuda private :: constructor - interface psb_c_multivect_gpu + interface psb_c_multivect_cuda module procedure constructor end interface @@ -1289,7 +1289,7 @@ contains function constructor(x) result(this) complex(psb_spk_) :: x(:,:) - type(psb_c_multivect_gpu) :: this + type(psb_c_multivect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -1299,15 +1299,15 @@ contains #ifdef HAVE_SPGPU -!!$ subroutine c_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ subroutine c_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i,n !!$ class(psb_i_base_multivect_type) :: idx !!$ complex(psb_spk_) :: y(:) -!!$ class(psb_c_multivect_gpu) :: x +!!$ class(psb_c_multivect_cuda) :: x !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (x%is_host()) call x%sync() !!$ @@ -1332,16 +1332,16 @@ contains !!$ end select !!$ !!$ -!!$ end subroutine c_gpu_multi_gthzv_x +!!$ end subroutine c_cuda_multi_gthzv_x !!$ !!$ !!$ -!!$ subroutine c_gpu_multi_sctb(n,idx,x,beta,y) +!!$ subroutine c_cuda_multi_sctb(n,idx,x,beta,y) !!$ implicit none !!$ !use psb_const_mod !!$ integer(psb_ipk_) :: n, idx(:) !!$ complex(psb_spk_) :: beta, x(:) -!!$ class(psb_c_multivect_gpu) :: y +!!$ class(psb_c_multivect_cuda) :: y !!$ integer(psb_ipk_) :: info !!$ !!$ if (n == 0) return @@ -1351,17 +1351,17 @@ contains !!$ call y%psb_c_base_multivect_type%sctb(n,idx,x,beta) !!$ call y%set_host() !!$ -!!$ end subroutine c_gpu_multi_sctb +!!$ end subroutine c_cuda_multi_sctb !!$ -!!$ subroutine c_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ subroutine c_cuda_multi_sctb_x(i,n,idx,x,beta,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i, n !!$ class(psb_i_base_multivect_type) :: idx !!$ complex(psb_spk_) :: beta, x(:) -!!$ class(psb_c_multivect_gpu) :: y +!!$ class(psb_c_multivect_cuda) :: y !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (y%is_host()) call y%sync() !!$ @@ -1387,13 +1387,13 @@ contains !!$ call y%sct(n,ii%v(i:),x,beta) !!$ end select !!$ -!!$ end subroutine c_gpu_multi_sctb_x +!!$ end subroutine c_cuda_multi_sctb_x - subroutine c_gpu_multi_bld_x(x,this) + subroutine c_cuda_multi_bld_x(x,this) use psb_base_mod complex(psb_spk_), intent(in) :: this(:,:) - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info, m, n m=size(this,1) @@ -1403,101 +1403,101 @@ contains call psb_realloc(m,n,x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'c_gpu_multi_bld_x',& + call psb_errpush(info,'c_cuda_multi_bld_x',& & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) end if x%v(1:m,1:n) = this(1:m,1:n) call x%set_host() call x%sync() - end subroutine c_gpu_multi_bld_x + end subroutine c_cuda_multi_bld_x - subroutine c_gpu_multi_bld_n(x,m,n) + subroutine c_cuda_multi_bld_n(x,m,n) integer(psb_ipk_), intent(in) :: m,n - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(m,n,info) if (info /= 0) then - call psb_errpush(info,'c_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + call psb_errpush(info,'c_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) end if - end subroutine c_gpu_multi_bld_n + end subroutine c_cuda_multi_bld_n - subroutine c_gpu_multi_set_host(x) + subroutine c_cuda_multi_set_host(x) implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x x%state = is_host - end subroutine c_gpu_multi_set_host + end subroutine c_cuda_multi_set_host - subroutine c_gpu_multi_set_dev(x) + subroutine c_cuda_multi_set_dev(x) implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x x%state = is_dev - end subroutine c_gpu_multi_set_dev + end subroutine c_cuda_multi_set_dev - subroutine c_gpu_multi_set_sync(x) + subroutine c_cuda_multi_set_sync(x) implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x x%state = is_sync - end subroutine c_gpu_multi_set_sync + end subroutine c_cuda_multi_set_sync - function c_gpu_multi_is_dev(x) result(res) + function c_cuda_multi_is_dev(x) result(res) implicit none - class(psb_c_multivect_gpu), intent(in) :: x + class(psb_c_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function c_gpu_multi_is_dev + end function c_cuda_multi_is_dev - function c_gpu_multi_is_host(x) result(res) + function c_cuda_multi_is_host(x) result(res) implicit none - class(psb_c_multivect_gpu), intent(in) :: x + class(psb_c_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function c_gpu_multi_is_host + end function c_cuda_multi_is_host - function c_gpu_multi_is_sync(x) result(res) + function c_cuda_multi_is_sync(x) result(res) implicit none - class(psb_c_multivect_gpu), intent(in) :: x + class(psb_c_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function c_gpu_multi_is_sync + end function c_cuda_multi_is_sync - function c_gpu_multi_get_nrows(x) result(res) + function c_cuda_multi_get_nrows(x) result(res) implicit none - class(psb_c_multivect_gpu), intent(in) :: x + class(psb_c_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_nrows - end function c_gpu_multi_get_nrows + end function c_cuda_multi_get_nrows - function c_gpu_multi_get_ncols(x) result(res) + function c_cuda_multi_get_ncols(x) result(res) implicit none - class(psb_c_multivect_gpu), intent(in) :: x + class(psb_c_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_ncols - end function c_gpu_multi_get_ncols + end function c_cuda_multi_get_ncols - function c_gpu_multi_get_fmt() result(res) + function c_cuda_multi_get_fmt() result(res) implicit none character(len=5) :: res res = 'cGPU' - end function c_gpu_multi_get_fmt + end function c_cuda_multi_get_fmt -!!$ function c_gpu_multi_dot_v(n,x,y) result(res) +!!$ function c_cuda_multi_dot_v(n,x,y) result(res) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ class(psb_c_base_multivect_type), intent(inout) :: y !!$ integer(psb_ipk_), intent(in) :: n !!$ complex(psb_spk_) :: res @@ -1514,13 +1514,13 @@ contains !!$ type is (psb_c_base_multivect_type) !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,x%v,1,yy%v,1) -!!$ type is (psb_c_multivect_gpu) +!!$ type is (psb_c_multivect_cuda) !!$ if (x%is_host()) call x%sync() !!$ if (yy%is_host()) call yy%sync() !!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) !!$ if (info /= 0) then !!$ info = psb_err_internal_error_ -!!$ call psb_errpush(info,'c_gpu_multi_dot_v') +!!$ call psb_errpush(info,'c_cuda_multi_dot_v') !!$ end if !!$ !!$ class default @@ -1529,11 +1529,11 @@ contains !!$ res = y%dot(n,x%v) !!$ end select !!$ -!!$ end function c_gpu_multi_dot_v +!!$ end function c_cuda_multi_dot_v !!$ -!!$ function c_gpu_multi_dot_a(n,x,y) result(res) +!!$ function c_cuda_multi_dot_a(n,x,y) result(res) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ complex(psb_spk_), intent(in) :: y(:) !!$ integer(psb_ipk_), intent(in) :: n !!$ complex(psb_spk_) :: res @@ -1542,14 +1542,14 @@ contains !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,y,1,x%v,1) !!$ -!!$ end function c_gpu_multi_dot_a +!!$ end function c_cuda_multi_dot_a !!$ -!!$ subroutine c_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine c_cuda_multi_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ class(psb_c_base_multivect_type), intent(inout) :: x -!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ class(psb_c_multivect_cuda), intent(inout) :: y !!$ complex(psb_spk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: nx, ny @@ -1562,7 +1562,7 @@ contains !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() -!!$ type is (psb_c_multivect_gpu) +!!$ type is (psb_c_multivect_cuda) !!$ ! Do something different here !!$ if ((beta /= dzero).and.y%is_host())& !!$ & call y%sync() @@ -1581,27 +1581,27 @@ contains !!$ call y%axpby(m,alpha,x%v,beta,info) !!$ end select !!$ -!!$ end subroutine c_gpu_multi_axpby_v +!!$ end subroutine c_cuda_multi_axpby_v !!$ -!!$ subroutine c_gpu_multi_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine c_cuda_multi_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ complex(psb_spk_), intent(in) :: x(:) -!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ class(psb_c_multivect_cuda), intent(inout) :: y !!$ complex(psb_spk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ if (y%is_dev()) call y%sync() !!$ call psb_geaxpby(m,alpha,x,beta,y%v,info) !!$ call y%set_host() -!!$ end subroutine c_gpu_multi_axpby_a +!!$ end subroutine c_cuda_multi_axpby_a !!$ -!!$ subroutine c_gpu_multi_mlt_v(x, y, info) +!!$ subroutine c_cuda_multi_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_c_base_multivect_type), intent(inout) :: x -!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ class(psb_c_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ integer(psb_ipk_) :: i, n @@ -1615,7 +1615,7 @@ contains !!$ y%v(i) = y%v(i) * xx%v(i) !!$ end do !!$ call y%set_host() -!!$ type is (psb_c_multivect_gpu) +!!$ type is (psb_c_multivect_cuda) !!$ ! Do something different here !!$ if (y%is_host()) call y%sync() !!$ if (xx%is_host()) call xx%sync() @@ -1627,13 +1627,13 @@ contains !!$ call y%set_host() !!$ end select !!$ -!!$ end subroutine c_gpu_multi_mlt_v +!!$ end subroutine c_cuda_multi_mlt_v !!$ -!!$ subroutine c_gpu_multi_mlt_a(x, y, info) +!!$ subroutine c_cuda_multi_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_spk_), intent(in) :: x(:) -!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ class(psb_c_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1641,15 +1641,15 @@ contains !!$ call y%sync() !!$ call y%psb_c_base_multivect_type%mlt(x,info) !!$ call y%set_host() -!!$ end subroutine c_gpu_multi_mlt_a +!!$ end subroutine c_cuda_multi_mlt_a !!$ -!!$ subroutine c_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine c_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_spk_), intent(in) :: alpha,beta !!$ complex(psb_spk_), intent(in) :: x(:) !!$ complex(psb_spk_), intent(in) :: y(:) -!!$ class(psb_c_multivect_gpu), intent(inout) :: z +!!$ class(psb_c_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1657,16 +1657,16 @@ contains !!$ if (z%is_dev()) call z%sync() !!$ call z%psb_c_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() -!!$ end subroutine c_gpu_multi_mlt_a_2 +!!$ end subroutine c_cuda_multi_mlt_a_2 !!$ -!!$ subroutine c_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ subroutine c_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod !!$ implicit none !!$ complex(psb_spk_), intent(in) :: alpha,beta !!$ class(psb_c_base_multivect_type), intent(inout) :: x !!$ class(psb_c_base_multivect_type), intent(inout) :: y -!!$ class(psb_c_multivect_gpu), intent(inout) :: z +!!$ class(psb_c_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ character(len=1), intent(in), optional :: conjgx, conjgy !!$ integer(psb_ipk_) :: i, n @@ -1689,9 +1689,9 @@ contains !!$ ! !!$ info = 0 !!$ select type(xx => x) -!!$ type is (psb_c_multivect_gpu) +!!$ type is (psb_c_multivect_cuda) !!$ select type (yy => y) -!!$ type is (psb_c_multivect_gpu) +!!$ type is (psb_c_multivect_cuda) !!$ if (xx%is_host()) call xx%sync() !!$ if (yy%is_host()) call yy%sync() !!$ ! Z state is irrelevant: it will be done on the GPU. @@ -1711,11 +1711,11 @@ contains !!$ call z%psb_c_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() !!$ end select -!!$ end subroutine c_gpu_multi_mlt_v_2 +!!$ end subroutine c_cuda_multi_mlt_v_2 - subroutine c_gpu_multi_set_scal(x,val) - class(psb_c_multivect_gpu), intent(inout) :: x + subroutine c_cuda_multi_set_scal(x,val) + class(psb_c_multivect_cuda), intent(inout) :: x complex(psb_spk_), intent(in) :: val integer(psb_ipk_) :: info @@ -1723,10 +1723,10 @@ contains if (x%is_dev()) call x%sync() call x%psb_c_base_multivect_type%set_scal(val) call x%set_host() - end subroutine c_gpu_multi_set_scal + end subroutine c_cuda_multi_set_scal - subroutine c_gpu_multi_set_vect(x,val) - class(psb_c_multivect_gpu), intent(inout) :: x + subroutine c_cuda_multi_set_vect(x,val) + class(psb_c_multivect_cuda), intent(inout) :: x complex(psb_spk_), intent(in) :: val(:,:) integer(psb_ipk_) :: nr integer(psb_ipk_) :: info @@ -1735,24 +1735,24 @@ contains call x%psb_c_base_multivect_type%set_vect(val) call x%set_host() - end subroutine c_gpu_multi_set_vect + end subroutine c_cuda_multi_set_vect -!!$ subroutine c_gpu_multi_scal(alpha, x) +!!$ subroutine c_cuda_multi_scal(alpha, x) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ complex(psb_spk_), intent (in) :: alpha !!$ !!$ if (x%is_dev()) call x%sync() !!$ call x%psb_c_base_multivect_type%scal(alpha) !!$ call x%set_host() -!!$ end subroutine c_gpu_multi_scal +!!$ end subroutine c_cuda_multi_scal !!$ !!$ -!!$ function c_gpu_multi_nrm2(n,x) result(res) +!!$ function c_cuda_multi_nrm2(n,x) result(res) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ integer(psb_ipk_) :: info @@ -1760,36 +1760,36 @@ contains !!$ if (x%is_host()) call x%sync() !!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) !!$ -!!$ end function c_gpu_multi_nrm2 +!!$ end function c_cuda_multi_nrm2 !!$ -!!$ function c_gpu_multi_amax(n,x) result(res) +!!$ function c_cuda_multi_amax(n,x) result(res) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = maxval(abs(x%v(1:n))) !!$ -!!$ end function c_gpu_multi_amax +!!$ end function c_cuda_multi_amax !!$ -!!$ function c_gpu_multi_asum(n,x) result(res) +!!$ function c_cuda_multi_asum(n,x) result(res) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = sum(abs(x%v(1:n))) !!$ -!!$ end function c_gpu_multi_asum +!!$ end function c_cuda_multi_asum - subroutine c_gpu_multi_all(m,n, x, info) + subroutine c_cuda_multi_all(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_multivect_gpu), intent(out) :: x + class(psb_c_multivect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(m,n,x%v,info,pad=czero) @@ -1799,26 +1799,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'c_gpu_multi_all',& + call psb_errpush(info,'c_cuda_multi_all',& & i_err=(/m,n,n,n,n/)) end if - end subroutine c_gpu_multi_all + end subroutine c_cuda_multi_all - subroutine c_gpu_multi_zero(x) + subroutine c_cuda_multi_zero(x) use psi_serial_mod implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine c_gpu_multi_zero + end subroutine c_cuda_multi_zero - subroutine c_gpu_multi_asb(m,n, x, info) + subroutine c_cuda_multi_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nd, nc @@ -1838,12 +1838,12 @@ contains call x%set_host() end if end if - end subroutine c_gpu_multi_asb + end subroutine c_cuda_multi_asb - subroutine c_gpu_multi_sync_space(x,info) + subroutine c_cuda_multi_sync_space(x,info) use psb_realloc_mod implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: mh,nh,md,nd @@ -1896,11 +1896,11 @@ contains end if - end subroutine c_gpu_multi_sync_space + end subroutine c_cuda_multi_sync_space - subroutine c_gpu_multi_sync(x) + subroutine c_cuda_multi_sync(x) implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -1916,16 +1916,16 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'c_gpu_multi_sync') + call psb_errpush(info,'c_cuda_multi_sync') end if - end subroutine c_gpu_multi_sync + end subroutine c_cuda_multi_sync - subroutine c_gpu_multi_free(x, info) + subroutine c_cuda_multi_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 @@ -1940,13 +1940,13 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine c_gpu_multi_free + end subroutine c_cuda_multi_free - subroutine c_gpu_multi_vect_finalize(x) + subroutine c_cuda_multi_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_c_multivect_gpu), intent(inout) :: x + type(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 @@ -1961,12 +1961,12 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine c_gpu_multi_vect_finalize + end subroutine c_cuda_multi_vect_finalize - subroutine c_gpu_multi_ins(n,irl,val,dupl,x,info) + subroutine c_cuda_multi_ins(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) complex(psb_spk_), intent(in) :: val(:,:) @@ -1979,11 +1979,11 @@ contains call x%psb_c_base_multivect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine c_gpu_multi_ins + end subroutine c_cuda_multi_ins #endif -end module psb_c_gpu_multivect_mod +end module psb_c_cuda_multivect_mod diff --git a/cuda/psb_gpu_env_mod.F90 b/cuda/psb_cuda_env_mod.F90 similarity index 74% rename from cuda/psb_gpu_env_mod.F90 rename to cuda/psb_cuda_env_mod.F90 index 0473f4ac..0d1d4ced 100644 --- a/cuda/psb_gpu_env_mod.F90 +++ b/cuda/psb_cuda_env_mod.F90 @@ -30,30 +30,30 @@ ! -module psb_gpu_env_mod +module psb_cuda_env_mod use psb_const_mod use iso_c_binding use base_cusparse_mod -! interface psb_gpu_init -! module procedure psb_gpu_init +! interface psb_cuda_init +! module procedure psb_cuda_init ! end interface #if defined(HAVE_CUDA) use core_mod interface - function psb_gpuGetHandle() & - & result(res) bind(c,name='psb_gpuGetHandle') + function psb_cudaGetHandle() & + & result(res) bind(c,name='psb_cudaGetHandle') use iso_c_binding type(c_ptr) :: res - end function psb_gpuGetHandle + end function psb_cudaGetHandle end interface interface - function psb_gpuGetStream() & - & result(res) bind(c,name='psb_gpuGetStream') + function psb_cudaGetStream() & + & result(res) bind(c,name='psb_cudaGetStream') use iso_c_binding type(c_ptr) :: res - end function psb_gpuGetStream + end function psb_cudaGetStream end interface interface @@ -66,11 +66,11 @@ module psb_gpu_env_mod end interface interface - function psb_cuda_getDeviceCount() & + function psb_cuda_inner_getDeviceCount() & & result(res) bind(c,name='getDeviceCount') use iso_c_binding integer(c_int) :: res - end function psb_cuda_getDeviceCount + end function psb_cuda_inner_getDeviceCount end interface interface @@ -92,39 +92,39 @@ module psb_gpu_env_mod interface - subroutine psb_gpuCreateHandle() & - & bind(c,name='psb_gpuCreateHandle') + subroutine psb_cudaCreateHandle() & + & bind(c,name='psb_cudaCreateHandle') use iso_c_binding - end subroutine psb_gpuCreateHandle + end subroutine psb_cudaCreateHandle end interface interface - subroutine psb_gpuSetStream(handle,stream) & - & bind(c,name='psb_gpuSetStream') + subroutine psb_cudaSetStream(handle,stream) & + & bind(c,name='psb_cudaSetStream') use iso_c_binding type(c_ptr), value :: handle, stream - end subroutine psb_gpuSetStream + end subroutine psb_cudaSetStream end interface interface - subroutine psb_gpuDestroyHandle() & - & bind(c,name='psb_gpuDestroyHandle') + subroutine psb_cudaDestroyHandle() & + & bind(c,name='psb_cudaDestroyHandle') use iso_c_binding - end subroutine psb_gpuDestroyHandle + end subroutine psb_cudaDestroyHandle end interface interface - subroutine psb_cudaReset() & + subroutine psb_cuda_innerReset() & & bind(c,name='cudaReset') use iso_c_binding - end subroutine psb_cudaReset + end subroutine psb_cuda_innerReset end interface interface - subroutine psb_gpuClose() & + subroutine psb_cuda_innerClose() & & bind(c,name='gpuClose') use iso_c_binding - end subroutine psb_gpuClose + end subroutine psb_cuda_innerClose end interface #endif @@ -180,15 +180,15 @@ module psb_gpu_env_mod Contains - function psb_gpu_get_maybe_free_buffer() result(res) + function psb_cuda_get_maybe_free_buffer() result(res) logical :: res res = gpu_do_maybe_free_buffer - end function psb_gpu_get_maybe_free_buffer + end function psb_cuda_get_maybe_free_buffer - subroutine psb_gpu_set_maybe_free_buffer(val) + subroutine psb_cuda_set_maybe_free_buffer(val) logical, intent(in) :: val gpu_do_maybe_free_buffer = val - end subroutine psb_gpu_set_maybe_free_buffer + end subroutine psb_cuda_set_maybe_free_buffer ! !!!!!!!!!!!!!!!!!!!!!! ! @@ -197,7 +197,7 @@ Contains ! !!!!!!!!!!!!!!!!!!!!!! - subroutine psb_gpu_init(ctxt,dev) + subroutine psb_cuda_init(ctxt,dev) use psb_penv_mod use psb_const_mod use psb_error_mod @@ -230,10 +230,10 @@ Contains end if if (info == 0) info = initFcusparse() if (info /= 0) then - call psb_errpush(psb_err_internal_error_,'psb_gpu_init') + call psb_errpush(psb_err_internal_error_,'psb_cuda_init') goto 9999 end if - call psb_gpuCreateHandle() + call psb_cudaCreateHandle() #endif call psb_erractionrestore(err_act) return @@ -241,80 +241,80 @@ Contains return - end subroutine psb_gpu_init + end subroutine psb_cuda_init - subroutine psb_gpu_DeviceSync() + subroutine psb_cuda_DeviceSync() #if defined(HAVE_CUDA) call psb_cudaSync() #endif - end subroutine psb_gpu_DeviceSync + end subroutine psb_cuda_DeviceSync - function psb_gpu_getDeviceCount() result(res) + function psb_cuda_getDeviceCount() result(res) integer :: res #if defined(HAVE_CUDA) - res = psb_cuda_getDeviceCount() + res = psb_cuda_inner_getDeviceCount() #else res = 0 #endif - end function psb_gpu_getDeviceCount + end function psb_cuda_getDeviceCount - subroutine psb_gpu_exit() + subroutine psb_cuda_exit() integer :: res res = closeFcusparse() - call psb_gpuClose() - call psb_cudaReset() - end subroutine psb_gpu_exit + call psb_cuda_innerClose() + call psb_cuda_innerReset() + end subroutine psb_cuda_exit - function psb_gpu_DeviceHasUVA() result(res) + function psb_cuda_DeviceHasUVA() result(res) logical :: res res = (psb_C_DeviceHasUVA() == 1) - end function psb_gpu_DeviceHasUVA + end function psb_cuda_DeviceHasUVA - function psb_gpu_MultiProcessors() result(res) + function psb_cuda_MultiProcessors() result(res) integer(psb_ipk_) :: res res = psb_C_get_MultiProcessors() - end function psb_gpu_MultiProcessors + end function psb_cuda_MultiProcessors - function psb_gpu_MaxRegistersPerBlock() result(res) + function psb_cuda_MaxRegistersPerBlock() result(res) integer(psb_ipk_) :: res res = psb_C_get_MaxRegistersPerBlock() - end function psb_gpu_MaxRegistersPerBlock + end function psb_cuda_MaxRegistersPerBlock - function psb_gpu_MaxThreadsPerMP() result(res) + function psb_cuda_MaxThreadsPerMP() result(res) integer(psb_ipk_) :: res res = psb_C_get_MaxThreadsPerMP() - end function psb_gpu_MaxThreadsPerMP + end function psb_cuda_MaxThreadsPerMP - function psb_gpu_WarpSize() result(res) + function psb_cuda_WarpSize() result(res) integer(psb_ipk_) :: res res = psb_C_get_WarpSize() - end function psb_gpu_WarpSize + end function psb_cuda_WarpSize - function psb_gpu_MemoryClockRate() result(res) + function psb_cuda_MemoryClockRate() result(res) integer(psb_ipk_) :: res res = psb_C_get_MemoryClockRate() - end function psb_gpu_MemoryClockRate + end function psb_cuda_MemoryClockRate - function psb_gpu_MemoryBusWidth() result(res) + function psb_cuda_MemoryBusWidth() result(res) integer(psb_ipk_) :: res res = psb_C_get_MemoryBusWidth() - end function psb_gpu_MemoryBusWidth + end function psb_cuda_MemoryBusWidth - function psb_gpu_MemoryPeakBandwidth() result(res) + function psb_cuda_MemoryPeakBandwidth() result(res) real(psb_dpk_) :: res ! Formula here: 2*ClockRate(KHz)*BusWidth(bit) ! normalization: bit/byte, KHz/MHz ! output: MBytes/s res = 2.d0*0.125d0*1.d-3*psb_C_get_MemoryBusWidth()*psb_C_get_MemoryClockRate() - end function psb_gpu_MemoryPeakBandwidth + end function psb_cuda_MemoryPeakBandwidth - function psb_gpu_DeviceName() result(res) + function psb_cuda_DeviceName() result(res) character(len=256) :: res character :: cstring(256) call psb_C_cpy_NameString(cstring) call stringc2f(cstring,res) - end function psb_gpu_DeviceName + end function psb_cuda_DeviceName subroutine stringc2f(cstring,fstring) @@ -337,4 +337,4 @@ Contains return end subroutine stringc2f -end module psb_gpu_env_mod +end module psb_cuda_env_mod diff --git a/cuda/psb_gpu_mod.F90 b/cuda/psb_cuda_mod.F90 similarity index 65% rename from cuda/psb_gpu_mod.F90 rename to cuda/psb_cuda_mod.F90 index 7eba8062..81ce3e31 100644 --- a/cuda/psb_gpu_mod.F90 +++ b/cuda/psb_cuda_mod.F90 @@ -30,60 +30,60 @@ ! -module psb_gpu_mod +module psb_cuda_mod use psb_const_mod - use psb_gpu_env_mod + use psb_cuda_env_mod - use psb_i_gpu_vect_mod - use psb_s_gpu_vect_mod - use psb_d_gpu_vect_mod - use psb_c_gpu_vect_mod - use psb_z_gpu_vect_mod + use psb_i_cuda_vect_mod + use psb_s_cuda_vect_mod + use psb_d_cuda_vect_mod + use psb_c_cuda_vect_mod + use psb_z_cuda_vect_mod - use psb_i_gpu_multivect_mod - use psb_s_gpu_multivect_mod - use psb_d_gpu_multivect_mod - use psb_c_gpu_multivect_mod - use psb_z_gpu_multivect_mod + use psb_i_cuda_multivect_mod + use psb_s_cuda_multivect_mod + use psb_d_cuda_multivect_mod + use psb_c_cuda_multivect_mod + use psb_z_cuda_multivect_mod use psb_d_ell_mat_mod - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod use psb_s_ell_mat_mod - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod use psb_z_ell_mat_mod - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod use psb_c_ell_mat_mod - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod use psb_s_hll_mat_mod - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod use psb_d_hll_mat_mod - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod use psb_c_hll_mat_mod - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod use psb_z_hll_mat_mod - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod - use psb_s_csrg_mat_mod - use psb_d_csrg_mat_mod - use psb_c_csrg_mat_mod - use psb_z_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #if CUDA_SHORT_VERSION <= 10 - use psb_s_hybg_mat_mod - use psb_d_hybg_mat_mod - use psb_c_hybg_mat_mod - use psb_z_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif - use psb_d_diag_mat_mod - use psb_d_hdiag_mat_mod + use psb_d_cuda_diag_mat_mod + use psb_d_cuda_hdiag_mat_mod - use psb_s_dnsg_mat_mod - use psb_d_dnsg_mat_mod - use psb_c_dnsg_mat_mod - use psb_z_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod - use psb_s_hdiag_mat_mod - ! use psb_s_diag_mat_mod + use psb_s_cuda_hdiag_mat_mod + ! use psb_s_cuda_diag_mat_mod -end module psb_gpu_mod +end module psb_cuda_mod diff --git a/cuda/psb_d_csrg_mat_mod.F90 b/cuda/psb_d_csrg_mat_mod.F90 deleted file mode 100644 index 177c7440..00000000 --- a/cuda/psb_d_csrg_mat_mod.F90 +++ /dev/null @@ -1,393 +0,0 @@ -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! - - -module psb_d_csrg_mat_mod - - use iso_c_binding - use psb_d_mat_mod - use cusparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_d_csr_sparse_mat) :: psb_d_csrg_sparse_mat - ! - ! cuSPARSE 4.0 CSR format. - ! - ! - ! - ! - ! -#ifdef HAVE_SPGPU - type(d_Cmat) :: deviceMat - integer(psb_ipk_) :: devstate = is_host - - contains - procedure, nopass :: get_fmt => d_csrg_get_fmt - procedure, pass(a) :: sizeof => d_csrg_sizeof - procedure, pass(a) :: vect_mv => psb_d_csrg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_csrg_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_csrg_csmm - procedure, pass(a) :: csmv => psb_d_csrg_csmv - procedure, pass(a) :: scals => psb_d_csrg_scals - procedure, pass(a) :: scalv => psb_d_csrg_scal - procedure, pass(a) :: reallocate_nz => psb_d_csrg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_csrg_allocate_mnnz - ! Note: we do *not* need the TO methods, because the parent type - ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_csrg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_cp_csrg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_csrg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_mv_csrg_from_fmt - procedure, pass(a) :: free => d_csrg_free - procedure, pass(a) :: mold => psb_d_csrg_mold - procedure, pass(a) :: is_host => d_csrg_is_host - procedure, pass(a) :: is_dev => d_csrg_is_dev - procedure, pass(a) :: is_sync => d_csrg_is_sync - procedure, pass(a) :: set_host => d_csrg_set_host - procedure, pass(a) :: set_dev => d_csrg_set_dev - procedure, pass(a) :: set_sync => d_csrg_set_sync - procedure, pass(a) :: sync => d_csrg_sync - procedure, pass(a) :: to_gpu => psb_d_csrg_to_gpu - procedure, pass(a) :: from_gpu => psb_d_csrg_from_gpu - final :: d_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_csrg_mold -#endif - end type psb_d_csrg_sparse_mat - -#ifdef HAVE_SPGPU - private :: d_csrg_get_nzeros, d_csrg_free, d_csrg_get_fmt, & - & d_csrg_get_size, d_csrg_sizeof, d_csrg_get_nz_row - - - interface - subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_csrg_inner_vect_sv - end interface - - - interface - subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_csrg_vect_mv - end interface - - interface - subroutine psb_d_csrg_reallocate_nz(nz,a) - import :: psb_d_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: nz - class(psb_d_csrg_sparse_mat), intent(inout) :: a - end subroutine psb_d_csrg_reallocate_nz - end interface - - interface - subroutine psb_d_csrg_allocate_mnnz(m,n,a,nz) - import :: psb_d_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: m,n - class(psb_d_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_csrg_allocate_mnnz - end interface - - interface - subroutine psb_d_csrg_mold(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_csrg_mold - end interface - - interface - subroutine psb_d_csrg_to_gpu(a,info, nzrm) - import :: psb_d_csrg_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_d_csrg_to_gpu - end interface - - interface - subroutine psb_d_csrg_from_gpu(a,info) - import :: psb_d_csrg_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_csrg_from_gpu - end interface - - interface - subroutine psb_d_cp_csrg_from_coo(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_csrg_from_coo - end interface - - interface - subroutine psb_d_cp_csrg_from_fmt(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_csrg_from_fmt - end interface - - interface - subroutine psb_d_mv_csrg_from_coo(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_csrg_from_coo - end interface - - interface - subroutine psb_d_mv_csrg_from_fmt(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_csrg_from_fmt - end interface - - interface - subroutine psb_d_csrg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:) - real(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_csrg_csmv - end interface - interface - subroutine psb_d_csrg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - real(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_csrg_csmm - end interface - - interface - subroutine psb_d_csrg_scal(d,a,info,side) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side - end subroutine psb_d_csrg_scal - end interface - - interface - subroutine psb_d_csrg_scals(d,a,info) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_csrg_scals - end interface - - -contains - - ! == =================================== - ! - ! - ! - ! Getters - ! - ! - ! - ! - ! - ! == =================================== - - - function d_csrg_sizeof(a) result(res) - implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - res = 8 - res = res + psb_sizeof_dp * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - ! Should we account for the shadow data structure - ! on the GPU device side? - ! res = 2*res - - end function d_csrg_sizeof - - function d_csrg_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSRG' - end function d_csrg_get_fmt - - - - ! == =================================== - ! - ! - ! - ! Data management - ! - ! - ! - ! - ! - ! == =================================== - - - subroutine d_csrg_set_host(a) - implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine d_csrg_set_host - - subroutine d_csrg_set_dev(a) - implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine d_csrg_set_dev - - subroutine d_csrg_set_sync(a) - implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine d_csrg_set_sync - - function d_csrg_is_dev(a) result(res) - implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function d_csrg_is_dev - - function d_csrg_is_host(a) result(res) - implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function d_csrg_is_host - - function d_csrg_is_sync(a) result(res) - implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function d_csrg_is_sync - - - subroutine d_csrg_sync(a) - implicit none - class(psb_d_csrg_sparse_mat), target, intent(in) :: a - class(psb_d_csrg_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (tmpa%is_host()) then - call tmpa%to_gpu(info) - else if (tmpa%is_dev()) then - call tmpa%from_gpu(info) - end if - call tmpa%set_sync() - return - - end subroutine d_csrg_sync - - subroutine d_csrg_free(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - class(psb_d_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - call a%psb_d_csr_sparse_mat%free() - - return - - end subroutine d_csrg_free - - subroutine d_csrg_finalize(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - type(psb_d_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - - return - - end subroutine d_csrg_finalize - -#else - interface - subroutine psb_d_csrg_mold(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_csrg_mold - end interface - -#endif - -end module psb_d_csrg_mat_mod diff --git a/cuda/psb_d_cuda_csrg_mat_mod.F90 b/cuda/psb_d_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..465c16a7 --- /dev/null +++ b/cuda/psb_d_cuda_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_csrg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_csr_sparse_mat) :: psb_d_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(d_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => d_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_d_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_d_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => d_cuda_csrg_free + procedure, pass(a) :: mold => psb_d_cuda_csrg_mold + procedure, pass(a) :: is_host => d_cuda_csrg_is_host + procedure, pass(a) :: is_dev => d_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => d_cuda_csrg_is_sync + procedure, pass(a) :: set_host => d_cuda_csrg_set_host + procedure, pass(a) :: set_dev => d_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => d_cuda_csrg_set_sync + procedure, pass(a) :: sync => d_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_d_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_d_cuda_csrg_from_gpu + final :: d_cuda_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_cuda_csrg_mold +#endif + end type psb_d_cuda_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_cuda_csrg_get_nzeros, d_cuda_csrg_free, d_cuda_csrg_get_fmt, & + & d_cuda_csrg_get_size, d_cuda_csrg_sizeof, d_cuda_csrg_get_nz_row + + + interface + subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_d_cuda_csrg_reallocate_nz(nz,a) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_d_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_d_cuda_csrg_mold(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_mold + end interface + + interface + subroutine psb_d_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_d_cuda_csrg_from_gpu(a,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_d_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_d_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_d_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_d_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_csmv + end interface + interface + subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_csmm + end interface + + interface + subroutine psb_d_cuda_csrg_scal(d,a,info,side) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_cuda_csrg_scal + end interface + + interface + subroutine psb_d_cuda_csrg_scals(d,a,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_cuda_csrg_sizeof + + function d_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function d_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine d_cuda_csrg_set_host(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_cuda_csrg_set_host + + subroutine d_cuda_csrg_set_dev(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_cuda_csrg_set_dev + + subroutine d_cuda_csrg_set_sync(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_cuda_csrg_set_sync + + function d_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_cuda_csrg_is_dev + + function d_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_cuda_csrg_is_host + + function d_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_cuda_csrg_is_sync + + + subroutine d_cuda_csrg_sync(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine d_cuda_csrg_sync + + subroutine d_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_d_csr_sparse_mat%free() + + return + + end subroutine d_cuda_csrg_free + + subroutine d_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine d_cuda_csrg_finalize + +#else + interface + subroutine psb_d_cuda_csrg_mold(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_mold + end interface + +#endif + +end module psb_d_cuda_csrg_mat_mod diff --git a/cuda/psb_d_diag_mat_mod.F90 b/cuda/psb_d_cuda_diag_mat_mod.F90 similarity index 52% rename from cuda/psb_d_diag_mat_mod.F90 rename to cuda/psb_d_cuda_diag_mat_mod.F90 index 564f7a13..1d55faa0 100644 --- a/cuda/psb_d_diag_mat_mod.F90 +++ b/cuda/psb_d_cuda_diag_mat_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_d_diag_mat_mod +module psb_d_cuda_diag_mat_mod use iso_c_binding use psb_base_mod use psb_d_dia_mat_mod - type, extends(psb_d_dia_sparse_mat) :: psb_d_diag_sparse_mat + type, extends(psb_d_dia_sparse_mat) :: psb_d_cuda_diag_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -48,170 +48,170 @@ module psb_d_diag_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => d_diag_get_fmt - procedure, pass(a) :: sizeof => d_diag_sizeof - procedure, pass(a) :: vect_mv => psb_d_diag_vect_mv -! procedure, pass(a) :: csmm => psb_d_diag_csmm - procedure, pass(a) :: csmv => psb_d_diag_csmv -! procedure, pass(a) :: in_vect_sv => psb_d_diag_inner_vect_sv -! procedure, pass(a) :: scals => psb_d_diag_scals -! procedure, pass(a) :: scalv => psb_d_diag_scal -! procedure, pass(a) :: reallocate_nz => psb_d_diag_reallocate_nz -! procedure, pass(a) :: allocate_mnnz => psb_d_diag_allocate_mnnz + procedure, nopass :: get_fmt => d_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => d_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_d_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_d_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_d_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_d_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_d_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_d_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_d_cuda_diag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_diag_from_coo -! procedure, pass(a) :: cp_from_fmt => psb_d_cp_diag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_diag_from_coo -! procedure, pass(a) :: mv_from_fmt => psb_d_mv_diag_from_fmt - procedure, pass(a) :: free => d_diag_free - procedure, pass(a) :: mold => psb_d_diag_mold - procedure, pass(a) :: to_gpu => psb_d_diag_to_gpu - final :: d_diag_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => d_cuda_diag_free + procedure, pass(a) :: mold => psb_d_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_diag_to_gpu + final :: d_cuda_diag_finalize #else contains - procedure, pass(a) :: mold => psb_d_diag_mold + procedure, pass(a) :: mold => psb_d_cuda_diag_mold #endif - end type psb_d_diag_sparse_mat + end type psb_d_cuda_diag_sparse_mat #ifdef HAVE_SPGPU - private :: d_diag_get_nzeros, d_diag_free, d_diag_get_fmt, & - & d_diag_get_size, d_diag_sizeof, d_diag_get_nz_row + private :: d_cuda_diag_get_nzeros, d_cuda_diag_free, d_cuda_diag_get_fmt, & + & d_cuda_diag_get_size, d_cuda_diag_sizeof, d_cuda_diag_get_nz_row interface - subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_diag_vect_mv + end subroutine psb_d_cuda_diag_vect_mv end interface interface - subroutine psb_d_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_d_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_diag_inner_vect_sv + end subroutine psb_d_cuda_diag_inner_vect_sv end interface interface - subroutine psb_d_diag_reallocate_nz(nz,a) - import :: psb_d_diag_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_diag_reallocate_nz(nz,a) + import :: psb_d_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_d_diag_sparse_mat), intent(inout) :: a - end subroutine psb_d_diag_reallocate_nz + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_diag_reallocate_nz end interface interface - subroutine psb_d_diag_allocate_mnnz(m,n,a,nz) - import :: psb_d_diag_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_diag_allocate_mnnz + end subroutine psb_d_cuda_diag_allocate_mnnz end interface interface - subroutine psb_d_diag_mold(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_mold(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_diag_mold + end subroutine psb_d_cuda_diag_mold end interface interface - subroutine psb_d_diag_to_gpu(a,info, nzrm) - import :: psb_d_diag_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_d_cuda_diag_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_d_diag_to_gpu + end subroutine psb_d_cuda_diag_to_gpu end interface interface - subroutine psb_d_cp_diag_from_coo(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_diag_from_coo(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_diag_from_coo + end subroutine psb_d_cuda_cp_diag_from_coo end interface interface - subroutine psb_d_cp_diag_from_fmt(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_diag_from_fmt + end subroutine psb_d_cuda_cp_diag_from_fmt end interface interface - subroutine psb_d_mv_diag_from_coo(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_diag_from_coo(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_diag_from_coo + end subroutine psb_d_cuda_mv_diag_from_coo end interface interface - subroutine psb_d_mv_diag_from_fmt(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_diag_from_fmt + end subroutine psb_d_cuda_mv_diag_from_fmt end interface interface - subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_diag_csmv + end subroutine psb_d_cuda_diag_csmv end interface interface - subroutine psb_d_diag_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_diag_csmm + end subroutine psb_d_cuda_diag_csmm end interface interface - subroutine psb_d_diag_scal(d,a,info, side) - import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_diag_scal(d,a,info, side) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_d_diag_scal + end subroutine psb_d_cuda_diag_scal end interface interface - subroutine psb_d_diag_scals(d,a,info) - import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_diag_scals(d,a,info) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_diag_scals + end subroutine psb_d_cuda_diag_scals end interface @@ -230,9 +230,9 @@ contains ! == =================================== - function d_diag_sizeof(a) result(res) + function d_cuda_diag_sizeof(a) result(res) implicit none - class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function d_diag_sizeof + end function d_cuda_diag_sizeof - function d_diag_get_fmt() result(res) + function d_cuda_diag_get_fmt() result(res) implicit none character(len=5) :: res res = 'DIAG' - end function d_diag_get_fmt + end function d_cuda_diag_get_fmt @@ -265,11 +265,11 @@ contains ! ! == =================================== - subroutine d_diag_free(a) + subroutine d_cuda_diag_free(a) use diagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) @@ -278,31 +278,31 @@ contains return - end subroutine d_diag_free + end subroutine d_cuda_diag_free - subroutine d_diag_finalize(a) + subroutine d_cuda_diag_finalize(a) use diagdev_mod implicit none - type(psb_d_diag_sparse_mat), intent(inout) :: a + type(psb_d_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine d_diag_finalize + end subroutine d_cuda_diag_finalize #else interface - subroutine psb_d_diag_mold(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_mold(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_diag_mold + end subroutine psb_d_cuda_diag_mold end interface #endif -end module psb_d_diag_mat_mod +end module psb_d_cuda_diag_mat_mod diff --git a/cuda/psb_d_dnsg_mat_mod.F90 b/cuda/psb_d_cuda_dnsg_mat_mod.F90 similarity index 51% rename from cuda/psb_d_dnsg_mat_mod.F90 rename to cuda/psb_d_cuda_dnsg_mat_mod.F90 index 966c2311..bb24eb1a 100644 --- a/cuda/psb_d_dnsg_mat_mod.F90 +++ b/cuda/psb_d_cuda_dnsg_mat_mod.F90 @@ -30,14 +30,14 @@ ! -module psb_d_dnsg_mat_mod +module psb_d_cuda_dnsg_mat_mod use iso_c_binding use psb_d_mat_mod use psb_d_dns_mat_mod use dnsdev_mod - type, extends(psb_d_dns_sparse_mat) :: psb_d_dnsg_sparse_mat + type, extends(psb_d_dns_sparse_mat) :: psb_d_cuda_dnsg_sparse_mat ! ! ITPACK/DNS format, extended. ! We are adding here the routines to create a copy of the data @@ -49,169 +49,169 @@ module psb_d_dnsg_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => d_dnsg_get_fmt - ! procedure, pass(a) :: sizeof => d_dnsg_sizeof - procedure, pass(a) :: vect_mv => psb_d_dnsg_vect_mv -!!$ procedure, pass(a) :: csmm => psb_d_dnsg_csmm -!!$ procedure, pass(a) :: csmv => psb_d_dnsg_csmv -!!$ procedure, pass(a) :: in_vect_sv => psb_d_dnsg_inner_vect_sv -!!$ procedure, pass(a) :: scals => psb_d_dnsg_scals -!!$ procedure, pass(a) :: scalv => psb_d_dnsg_scal -!!$ procedure, pass(a) :: reallocate_nz => psb_d_dnsg_reallocate_nz -!!$ procedure, pass(a) :: allocate_mnnz => psb_d_dnsg_allocate_mnnz + procedure, nopass :: get_fmt => d_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => d_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_d_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_d_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_d_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_d_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_d_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_d_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_d_cuda_dnsg_allocate_mnnz ! Note: we *do* need the TO methods, because of the need to invoke SYNC ! - procedure, pass(a) :: cp_from_coo => psb_d_cp_dnsg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_cp_dnsg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_dnsg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_mv_dnsg_from_fmt - procedure, pass(a) :: free => d_dnsg_free - procedure, pass(a) :: mold => psb_d_dnsg_mold - procedure, pass(a) :: to_gpu => psb_d_dnsg_to_gpu - final :: d_dnsg_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => d_cuda_dnsg_free + procedure, pass(a) :: mold => psb_d_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_dnsg_to_gpu + final :: d_cuda_dnsg_finalize #else contains - procedure, pass(a) :: mold => psb_d_dnsg_mold + procedure, pass(a) :: mold => psb_d_cuda_dnsg_mold #endif - end type psb_d_dnsg_sparse_mat + end type psb_d_cuda_dnsg_sparse_mat #ifdef HAVE_SPGPU - private :: d_dnsg_get_nzeros, d_dnsg_free, d_dnsg_get_fmt, & - & d_dnsg_get_size, d_dnsg_get_nz_row + private :: d_cuda_dnsg_get_nzeros, d_cuda_dnsg_free, d_cuda_dnsg_get_fmt, & + & d_cuda_dnsg_get_size, d_cuda_dnsg_get_nz_row interface - subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_dnsg_vect_mv + end subroutine psb_d_cuda_dnsg_vect_mv end interface !!$ !!$ interface -!!$ subroutine psb_d_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_d_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type -!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_d_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_dnsg_inner_vect_sv +!!$ end subroutine psb_d_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_d_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_d_dnsg_reallocate_nz +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_d_dnsg_allocate_mnnz +!!$ end subroutine psb_d_cuda_dnsg_allocate_mnnz !!$ end interface interface - subroutine psb_d_dnsg_mold(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_dnsg_mold(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_dnsg_mold + end subroutine psb_d_cuda_dnsg_mold end interface interface - subroutine psb_d_dnsg_to_gpu(a,info) - import :: psb_d_dnsg_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_dnsg_to_gpu(a,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_dnsg_to_gpu + end subroutine psb_d_cuda_dnsg_to_gpu end interface interface - subroutine psb_d_cp_dnsg_from_coo(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_dnsg_from_coo + end subroutine psb_d_cuda_cp_dnsg_from_coo end interface interface - subroutine psb_d_cp_dnsg_from_fmt(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_dnsg_from_fmt + end subroutine psb_d_cuda_cp_dnsg_from_fmt end interface interface - subroutine psb_d_mv_dnsg_from_coo(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_dnsg_from_coo + end subroutine psb_d_cuda_mv_dnsg_from_coo end interface interface - subroutine psb_d_mv_dnsg_from_fmt(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_dnsg_from_fmt + end subroutine psb_d_cuda_mv_dnsg_from_fmt end interface !!$ interface -!!$ subroutine psb_d_dnsg_csmv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:) !!$ real(psb_dpk_), intent(inout) :: y(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_dnsg_csmv +!!$ end subroutine psb_d_cuda_dnsg_csmv !!$ end interface !!$ interface -!!$ subroutine psb_d_dnsg_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) !!$ real(psb_dpk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_dnsg_csmm +!!$ end subroutine psb_d_cuda_dnsg_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_dnsg_scal(d,a,info, side) -!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ real(psb_dpk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_d_dnsg_scal +!!$ end subroutine psb_d_cuda_dnsg_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_dnsg_scals(d,a,info) -!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ real(psb_dpk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_d_dnsg_scals +!!$ end subroutine psb_d_cuda_dnsg_scals !!$ end interface !!$ @@ -231,11 +231,11 @@ contains - function d_dnsg_get_fmt() result(res) + function d_cuda_dnsg_get_fmt() result(res) implicit none character(len=5) :: res res = 'DNSG' - end function d_dnsg_get_fmt + end function d_cuda_dnsg_get_fmt @@ -251,11 +251,11 @@ contains ! ! == =================================== - subroutine d_dnsg_free(a) + subroutine d_cuda_dnsg_free(a) use dnsdev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) @@ -264,31 +264,31 @@ contains return - end subroutine d_dnsg_free + end subroutine d_cuda_dnsg_free - subroutine d_dnsg_finalize(a) + subroutine d_cuda_dnsg_finalize(a) use dnsdev_mod implicit none - type(psb_d_dnsg_sparse_mat), intent(inout) :: a + type(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine d_dnsg_finalize + end subroutine d_cuda_dnsg_finalize #else interface - subroutine psb_d_dnsg_mold(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_dnsg_mold(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_dnsg_mold + end subroutine psb_d_cuda_dnsg_mold end interface #endif -end module psb_d_dnsg_mat_mod +end module psb_d_cuda_dnsg_mat_mod diff --git a/cuda/psb_d_elg_mat_mod.F90 b/cuda/psb_d_cuda_elg_mat_mod.F90 similarity index 50% rename from cuda/psb_d_elg_mat_mod.F90 rename to cuda/psb_d_cuda_elg_mat_mod.F90 index eac7bb36..1ac47664 100644 --- a/cuda/psb_d_elg_mat_mod.F90 +++ b/cuda/psb_d_cuda_elg_mat_mod.F90 @@ -30,18 +30,18 @@ ! -module psb_d_elg_mat_mod +module psb_d_cuda_elg_mat_mod use iso_c_binding use psb_d_mat_mod use psb_d_ell_mat_mod - use psb_i_gpu_vect_mod + use psb_i_cuda_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_d_ell_sparse_mat) :: psb_d_elg_sparse_mat + type, extends(psb_d_ell_sparse_mat) :: psb_d_cuda_elg_sparse_mat ! ! ITPACK/ELL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,221 +54,221 @@ module psb_d_elg_mat_mod integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => d_elg_get_fmt - procedure, pass(a) :: sizeof => d_elg_sizeof - procedure, pass(a) :: vect_mv => psb_d_elg_vect_mv - procedure, pass(a) :: csmm => psb_d_elg_csmm - procedure, pass(a) :: csmv => psb_d_elg_csmv - procedure, pass(a) :: in_vect_sv => psb_d_elg_inner_vect_sv - procedure, pass(a) :: scals => psb_d_elg_scals - procedure, pass(a) :: scalv => psb_d_elg_scal - procedure, pass(a) :: reallocate_nz => psb_d_elg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_elg_allocate_mnnz - procedure, pass(a) :: reinit => d_elg_reinit + procedure, nopass :: get_fmt => d_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_d_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_d_cuda_elg_scals + procedure, pass(a) :: scalv => psb_d_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => d_cuda_elg_reinit ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_elg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_cp_elg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_elg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_mv_elg_from_fmt - procedure, pass(a) :: free => d_elg_free - procedure, pass(a) :: mold => psb_d_elg_mold - procedure, pass(a) :: csput_a => psb_d_elg_csput_a - procedure, pass(a) :: csput_v => psb_d_elg_csput_v - procedure, pass(a) :: is_host => d_elg_is_host - procedure, pass(a) :: is_dev => d_elg_is_dev - procedure, pass(a) :: is_sync => d_elg_is_sync - procedure, pass(a) :: set_host => d_elg_set_host - procedure, pass(a) :: set_dev => d_elg_set_dev - procedure, pass(a) :: set_sync => d_elg_set_sync - procedure, pass(a) :: sync => d_elg_sync - procedure, pass(a) :: from_gpu => psb_d_elg_from_gpu - procedure, pass(a) :: to_gpu => psb_d_elg_to_gpu - procedure, pass(a) :: asb => psb_d_elg_asb - final :: d_elg_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => d_cuda_elg_free + procedure, pass(a) :: mold => psb_d_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_d_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_d_cuda_elg_csput_v + procedure, pass(a) :: is_host => d_cuda_elg_is_host + procedure, pass(a) :: is_dev => d_cuda_elg_is_dev + procedure, pass(a) :: is_sync => d_cuda_elg_is_sync + procedure, pass(a) :: set_host => d_cuda_elg_set_host + procedure, pass(a) :: set_dev => d_cuda_elg_set_dev + procedure, pass(a) :: set_sync => d_cuda_elg_set_sync + procedure, pass(a) :: sync => d_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_d_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_d_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_d_cuda_elg_asb + final :: d_cuda_elg_finalize #else contains - procedure, pass(a) :: mold => psb_d_elg_mold - procedure, pass(a) :: asb => psb_d_elg_asb + procedure, pass(a) :: mold => psb_d_cuda_elg_mold + procedure, pass(a) :: asb => psb_d_cuda_elg_asb #endif - end type psb_d_elg_sparse_mat + end type psb_d_cuda_elg_sparse_mat #ifdef HAVE_SPGPU - private :: d_elg_get_nzeros, d_elg_free, d_elg_get_fmt, & - & d_elg_get_size, d_elg_sizeof, d_elg_get_nz_row, d_elg_sync + private :: d_cuda_elg_get_nzeros, d_cuda_elg_free, d_cuda_elg_get_fmt, & + & d_cuda_elg_get_size, d_cuda_elg_sizeof, d_cuda_elg_get_nz_row, d_cuda_elg_sync interface - subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_elg_vect_mv + end subroutine psb_d_cuda_elg_vect_mv end interface interface - subroutine psb_d_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_d_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_elg_inner_vect_sv + end subroutine psb_d_cuda_elg_inner_vect_sv end interface interface - subroutine psb_d_elg_reallocate_nz(nz,a) - import :: psb_d_elg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_elg_reallocate_nz(nz,a) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_d_elg_sparse_mat), intent(inout) :: a - end subroutine psb_d_elg_reallocate_nz + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_elg_reallocate_nz end interface interface - subroutine psb_d_elg_allocate_mnnz(m,n,a,nz) - import :: psb_d_elg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_elg_allocate_mnnz + end subroutine psb_d_cuda_elg_allocate_mnnz end interface interface - subroutine psb_d_elg_mold(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_mold(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_mold + end subroutine psb_d_cuda_elg_mold end interface interface - subroutine psb_d_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& & imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_csput_a + end subroutine psb_d_cuda_elg_csput_a end interface interface - subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_d_base_vect_type,& + subroutine psb_d_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_d_base_vect_type,& & psb_i_base_vect_type - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_csput_v + end subroutine psb_d_cuda_elg_csput_v end interface interface - subroutine psb_d_elg_from_gpu(a,info) - import :: psb_d_elg_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_elg_from_gpu(a,info) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_from_gpu + end subroutine psb_d_cuda_elg_from_gpu end interface interface - subroutine psb_d_elg_to_gpu(a,info, nzrm) - import :: psb_d_elg_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_d_elg_to_gpu + end subroutine psb_d_cuda_elg_to_gpu end interface interface - subroutine psb_d_cp_elg_from_coo(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_elg_from_coo + end subroutine psb_d_cuda_cp_elg_from_coo end interface interface - subroutine psb_d_cp_elg_from_fmt(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_elg_from_fmt + end subroutine psb_d_cuda_cp_elg_from_fmt end interface interface - subroutine psb_d_mv_elg_from_coo(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_elg_from_coo(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_elg_from_coo + end subroutine psb_d_cuda_mv_elg_from_coo end interface interface - subroutine psb_d_mv_elg_from_fmt(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_elg_from_fmt + end subroutine psb_d_cuda_mv_elg_from_fmt end interface interface - subroutine psb_d_elg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_elg_csmv + end subroutine psb_d_cuda_elg_csmv end interface interface - subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_elg_csmm + end subroutine psb_d_cuda_elg_csmm end interface interface - subroutine psb_d_elg_scal(d,a,info, side) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_elg_scal(d,a,info, side) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_d_elg_scal + end subroutine psb_d_cuda_elg_scal end interface interface - subroutine psb_d_elg_scals(d,a,info) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_elg_scals(d,a,info) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_scals + end subroutine psb_d_cuda_elg_scals end interface interface - subroutine psb_d_elg_asb(a) - import :: psb_d_elg_sparse_mat - class(psb_d_elg_sparse_mat), intent(inout) :: a - end subroutine psb_d_elg_asb + subroutine psb_d_cuda_elg_asb(a) + import :: psb_d_cuda_elg_sparse_mat + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_elg_asb end interface @@ -287,9 +287,9 @@ contains ! == =================================== - function d_elg_sizeof(a) result(res) + function d_cuda_elg_sizeof(a) result(res) implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res if (a%is_dev()) call a%sync() @@ -302,13 +302,13 @@ contains ! on the GPU device side? ! res = 2*res - end function d_elg_sizeof + end function d_cuda_elg_sizeof - function d_elg_get_fmt() result(res) + function d_cuda_elg_get_fmt() result(res) implicit none character(len=5) :: res res = 'ELG' - end function d_elg_get_fmt + end function d_cuda_elg_get_fmt @@ -323,12 +323,12 @@ contains ! ! ! == =================================== - subroutine d_elg_reinit(a,clear) + subroutine d_cuda_elg_reinit(a,clear) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a logical, intent(in), optional :: clear integer(psb_ipk_) :: isz, err_act character(len=20) :: name='reinit' @@ -367,14 +367,14 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine d_elg_reinit + end subroutine d_cuda_elg_reinit - subroutine d_elg_free(a) + subroutine d_cuda_elg_free(a) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) @@ -384,12 +384,12 @@ contains return - end subroutine d_elg_free + end subroutine d_cuda_elg_free - subroutine d_elg_sync(a) + subroutine d_cuda_elg_sync(a) implicit none - class(psb_d_elg_sparse_mat), target, intent(in) :: a - class(psb_d_elg_sparse_mat), pointer :: tmpa + class(psb_d_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -401,83 +401,83 @@ contains call tmpa%set_sync() return - end subroutine d_elg_sync + end subroutine d_cuda_elg_sync - subroutine d_elg_set_host(a) + subroutine d_cuda_elg_set_host(a) implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine d_elg_set_host + end subroutine d_cuda_elg_set_host - subroutine d_elg_set_dev(a) + subroutine d_cuda_elg_set_dev(a) implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine d_elg_set_dev + end subroutine d_cuda_elg_set_dev - subroutine d_elg_set_sync(a) + subroutine d_cuda_elg_set_sync(a) implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine d_elg_set_sync + end subroutine d_cuda_elg_set_sync - function d_elg_is_dev(a) result(res) + function d_cuda_elg_is_dev(a) result(res) implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function d_elg_is_dev + end function d_cuda_elg_is_dev - function d_elg_is_host(a) result(res) + function d_cuda_elg_is_host(a) result(res) implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function d_elg_is_host + end function d_cuda_elg_is_host - function d_elg_is_sync(a) result(res) + function d_cuda_elg_is_sync(a) result(res) implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function d_elg_is_sync + end function d_cuda_elg_is_sync - subroutine d_elg_finalize(a) + subroutine d_cuda_elg_finalize(a) use elldev_mod implicit none - type(psb_d_elg_sparse_mat), intent(inout) :: a + type(psb_d_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine d_elg_finalize + end subroutine d_cuda_elg_finalize #else interface - subroutine psb_d_elg_asb(a) - import :: psb_d_elg_sparse_mat - class(psb_d_elg_sparse_mat), intent(inout) :: a - end subroutine psb_d_elg_asb + subroutine psb_d_cuda_elg_asb(a) + import :: psb_d_cuda_elg_sparse_mat + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_elg_asb end interface interface - subroutine psb_d_elg_mold(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_mold(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_mold + end subroutine psb_d_cuda_elg_mold end interface #endif -end module psb_d_elg_mat_mod +end module psb_d_cuda_elg_mat_mod diff --git a/cuda/psb_d_hdiag_mat_mod.F90 b/cuda/psb_d_cuda_hdiag_mat_mod.F90 similarity index 50% rename from cuda/psb_d_hdiag_mat_mod.F90 rename to cuda/psb_d_cuda_hdiag_mat_mod.F90 index 1bc70c8c..17bacffe 100644 --- a/cuda/psb_d_hdiag_mat_mod.F90 +++ b/cuda/psb_d_cuda_hdiag_mat_mod.F90 @@ -30,182 +30,182 @@ ! -module psb_d_hdiag_mat_mod +module psb_d_cuda_hdiag_mat_mod use iso_c_binding use psb_base_mod use psb_d_hdia_mat_mod - type, extends(psb_d_hdia_sparse_mat) :: psb_d_hdiag_sparse_mat + type, extends(psb_d_hdia_sparse_mat) :: psb_d_cuda_hdiag_sparse_mat ! #ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => d_hdiag_get_fmt - ! procedure, pass(a) :: sizeof => d_hdiag_sizeof - procedure, pass(a) :: vect_mv => psb_d_hdiag_vect_mv - ! procedure, pass(a) :: csmm => psb_d_hdiag_csmm - procedure, pass(a) :: csmv => psb_d_hdiag_csmv - ! procedure, pass(a) :: in_vect_sv => psb_d_hdiag_inner_vect_sv - ! procedure, pass(a) :: scals => psb_d_hdiag_scals - ! procedure, pass(a) :: scalv => psb_d_hdiag_scal - ! procedure, pass(a) :: reallocate_nz => psb_d_hdiag_reallocate_nz - ! procedure, pass(a) :: allocate_mnnz => psb_d_hdiag_allocate_mnnz + procedure, nopass :: get_fmt => d_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => d_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_d_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_d_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_d_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_d_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_d_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_d_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_d_cuda_hdiag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_hdiag_from_coo - ! procedure, pass(a) :: cp_from_fmt => psb_d_cp_hdiag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_hdiag_from_coo - ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_hdiag_from_fmt - procedure, pass(a) :: free => d_hdiag_free - procedure, pass(a) :: mold => psb_d_hdiag_mold - procedure, pass(a) :: to_gpu => psb_d_hdiag_to_gpu - final :: d_hdiag_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => d_cuda_hdiag_free + procedure, pass(a) :: mold => psb_d_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_hdiag_to_gpu + final :: d_cuda_hdiag_finalize #else contains - procedure, pass(a) :: mold => psb_d_hdiag_mold + procedure, pass(a) :: mold => psb_d_cuda_hdiag_mold #endif - end type psb_d_hdiag_sparse_mat + end type psb_d_cuda_hdiag_sparse_mat #ifdef HAVE_SPGPU - private :: d_hdiag_get_nzeros, d_hdiag_free, d_hdiag_get_fmt, & - & d_hdiag_get_size, d_hdiag_sizeof, d_hdiag_get_nz_row + private :: d_cuda_hdiag_get_nzeros, d_cuda_hdiag_free, d_cuda_hdiag_get_fmt, & + & d_cuda_hdiag_get_size, d_cuda_hdiag_sizeof, d_cuda_hdiag_get_nz_row interface - subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hdiag_vect_mv + end subroutine psb_d_cuda_hdiag_vect_mv end interface !!$ interface -!!$ subroutine psb_d_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_d_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type -!!$ class(psb_d_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_d_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_hdiag_inner_vect_sv +!!$ end subroutine psb_d_cuda_hdiag_inner_vect_sv !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_hdiag_reallocate_nz(nz,a) -!!$ import :: psb_d_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_d_hdiag_reallocate_nz +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_cuda_hdiag_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_hdiag_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_d_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_d_hdiag_allocate_mnnz +!!$ end subroutine psb_d_cuda_hdiag_allocate_mnnz !!$ end interface interface - subroutine psb_d_hdiag_mold(a,b,info) - import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hdiag_mold(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hdiag_mold + end subroutine psb_d_cuda_hdiag_mold end interface interface - subroutine psb_d_hdiag_to_gpu(a,info) - import :: psb_d_hdiag_sparse_mat, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hdiag_to_gpu(a,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hdiag_to_gpu + end subroutine psb_d_cuda_hdiag_to_gpu end interface interface - subroutine psb_d_cp_hdiag_from_coo(a,b,info) - import :: psb_d_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_hdiag_from_coo + end subroutine psb_d_cuda_cp_hdiag_from_coo end interface !!$ interface -!!$ subroutine psb_d_cp_hdiag_from_fmt(a,b,info) -!!$ import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_d_base_sparse_mat), intent(in) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_d_cp_hdiag_from_fmt +!!$ end subroutine psb_d_cuda_cp_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_d_mv_hdiag_from_coo(a,b,info) - import :: psb_d_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_hdiag_from_coo + end subroutine psb_d_cuda_mv_hdiag_from_coo end interface !!$ !!$ interface -!!$ subroutine psb_d_mv_hdiag_from_fmt(a,b,info) -!!$ import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_d_base_sparse_mat), intent(inout) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_d_mv_hdiag_from_fmt +!!$ end subroutine psb_d_cuda_mv_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hdiag_csmv + end subroutine psb_d_cuda_hdiag_csmv end interface !!$ interface -!!$ subroutine psb_d_hdiag_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) !!$ real(psb_dpk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_hdiag_csmm +!!$ end subroutine psb_d_cuda_hdiag_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_hdiag_scal(d,a,info, side) -!!$ import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ real(psb_dpk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_d_hdiag_scal +!!$ end subroutine psb_d_cuda_hdiag_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_hdiag_scals(d,a,info) -!!$ import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ real(psb_dpk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_d_hdiag_scals +!!$ end subroutine psb_d_cuda_hdiag_scals !!$ end interface !!$ @@ -223,11 +223,11 @@ contains ! ! == =================================== - function d_hdiag_get_fmt() result(res) + function d_cuda_hdiag_get_fmt() result(res) implicit none character(len=5) :: res res = 'HDIAG' - end function d_hdiag_get_fmt + end function d_cuda_hdiag_get_fmt @@ -243,11 +243,11 @@ contains ! ! == =================================== - subroutine d_hdiag_free(a) + subroutine d_cuda_hdiag_free(a) use hdiagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -256,12 +256,12 @@ contains return - end subroutine d_hdiag_free + end subroutine d_cuda_hdiag_free - subroutine d_hdiag_finalize(a) + subroutine d_cuda_hdiag_finalize(a) use hdiagdev_mod implicit none - type(psb_d_hdiag_sparse_mat), intent(inout) :: a + type(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -269,19 +269,19 @@ contains call a%psb_d_hdia_sparse_mat%free() return - end subroutine d_hdiag_finalize + end subroutine d_cuda_hdiag_finalize #else interface - subroutine psb_d_hdiag_mold(a,b,info) - import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hdiag_mold(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hdiag_mold + end subroutine psb_d_cuda_hdiag_mold end interface #endif -end module psb_d_hdiag_mat_mod +end module psb_d_cuda_hdiag_mat_mod diff --git a/cuda/psb_d_hlg_mat_mod.F90 b/cuda/psb_d_cuda_hlg_mat_mod.F90 similarity index 50% rename from cuda/psb_d_hlg_mat_mod.F90 rename to cuda/psb_d_cuda_hlg_mat_mod.F90 index 756d13aa..19ecb62b 100644 --- a/cuda/psb_d_hlg_mat_mod.F90 +++ b/cuda/psb_d_cuda_hlg_mat_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_d_hlg_mat_mod +module psb_d_cuda_hlg_mat_mod use iso_c_binding use psb_d_mat_mod @@ -41,7 +41,7 @@ module psb_d_hlg_mat_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_d_hll_sparse_mat) :: psb_d_hlg_sparse_mat + type, extends(psb_d_hll_sparse_mat) :: psb_d_cuda_hlg_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,186 +54,186 @@ module psb_d_hlg_mat_mod integer :: devstate = is_host contains - procedure, nopass :: get_fmt => d_hlg_get_fmt - procedure, pass(a) :: sizeof => d_hlg_sizeof - procedure, pass(a) :: vect_mv => psb_d_hlg_vect_mv - procedure, pass(a) :: csmm => psb_d_hlg_csmm - procedure, pass(a) :: csmv => psb_d_hlg_csmv - procedure, pass(a) :: in_vect_sv => psb_d_hlg_inner_vect_sv - procedure, pass(a) :: scals => psb_d_hlg_scals - procedure, pass(a) :: scalv => psb_d_hlg_scal - procedure, pass(a) :: reallocate_nz => psb_d_hlg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_hlg_allocate_mnnz + procedure, nopass :: get_fmt => d_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_d_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_d_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_d_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_hlg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_hlg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_cp_hlg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_hlg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_mv_hlg_from_fmt - procedure, pass(a) :: free => d_hlg_free - procedure, pass(a) :: mold => psb_d_hlg_mold - procedure, pass(a) :: is_host => d_hlg_is_host - procedure, pass(a) :: is_dev => d_hlg_is_dev - procedure, pass(a) :: is_sync => d_hlg_is_sync - procedure, pass(a) :: set_host => d_hlg_set_host - procedure, pass(a) :: set_dev => d_hlg_set_dev - procedure, pass(a) :: set_sync => d_hlg_set_sync - procedure, pass(a) :: sync => d_hlg_sync - procedure, pass(a) :: from_gpu => psb_d_hlg_from_gpu - procedure, pass(a) :: to_gpu => psb_d_hlg_to_gpu - final :: d_hlg_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => d_cuda_hlg_free + procedure, pass(a) :: mold => psb_d_cuda_hlg_mold + procedure, pass(a) :: is_host => d_cuda_hlg_is_host + procedure, pass(a) :: is_dev => d_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => d_cuda_hlg_is_sync + procedure, pass(a) :: set_host => d_cuda_hlg_set_host + procedure, pass(a) :: set_dev => d_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => d_cuda_hlg_set_sync + procedure, pass(a) :: sync => d_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_d_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_d_cuda_hlg_to_gpu + final :: d_cuda_hlg_finalize #else contains - procedure, pass(a) :: mold => psb_d_hlg_mold + procedure, pass(a) :: mold => psb_d_cuda_hlg_mold #endif - end type psb_d_hlg_sparse_mat + end type psb_d_cuda_hlg_sparse_mat #ifdef HAVE_SPGPU - private :: d_hlg_get_nzeros, d_hlg_free, d_hlg_get_fmt, & - & d_hlg_get_size, d_hlg_sizeof, d_hlg_get_nz_row + private :: d_cuda_hlg_get_nzeros, d_cuda_hlg_free, d_cuda_hlg_get_fmt, & + & d_cuda_hlg_get_size, d_cuda_hlg_sizeof, d_cuda_hlg_get_nz_row interface - subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hlg_vect_mv + end subroutine psb_d_cuda_hlg_vect_mv end interface interface - subroutine psb_d_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_d_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hlg_inner_vect_sv + end subroutine psb_d_cuda_hlg_inner_vect_sv end interface interface - subroutine psb_d_hlg_reallocate_nz(nz,a) - import :: psb_d_hlg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_hlg_reallocate_nz(nz,a) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_d_hlg_sparse_mat), intent(inout) :: a - end subroutine psb_d_hlg_reallocate_nz + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_hlg_reallocate_nz end interface interface - subroutine psb_d_hlg_allocate_mnnz(m,n,a,nz) - import :: psb_d_hlg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_hlg_allocate_mnnz + end subroutine psb_d_cuda_hlg_allocate_mnnz end interface interface - subroutine psb_d_hlg_mold(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_mold(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hlg_mold + end subroutine psb_d_cuda_hlg_mold end interface interface - subroutine psb_d_hlg_from_gpu(a,info) - import :: psb_d_hlg_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hlg_from_gpu(a,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hlg_from_gpu + end subroutine psb_d_cuda_hlg_from_gpu end interface interface - subroutine psb_d_hlg_to_gpu(a,info, nzrm) - import :: psb_d_hlg_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_d_hlg_to_gpu + end subroutine psb_d_cuda_hlg_to_gpu end interface interface - subroutine psb_d_cp_hlg_from_coo(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_hlg_from_coo + end subroutine psb_d_cuda_cp_hlg_from_coo end interface interface - subroutine psb_d_cp_hlg_from_fmt(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_hlg_from_fmt + end subroutine psb_d_cuda_cp_hlg_from_fmt end interface interface - subroutine psb_d_mv_hlg_from_coo(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_hlg_from_coo + end subroutine psb_d_cuda_mv_hlg_from_coo end interface interface - subroutine psb_d_mv_hlg_from_fmt(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_hlg_from_fmt + end subroutine psb_d_cuda_mv_hlg_from_fmt end interface interface - subroutine psb_d_hlg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hlg_csmv + end subroutine psb_d_cuda_hlg_csmv end interface interface - subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hlg_csmm + end subroutine psb_d_cuda_hlg_csmm end interface interface - subroutine psb_d_hlg_scal(d,a,info, side) - import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hlg_scal(d,a,info, side) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_d_hlg_scal + end subroutine psb_d_cuda_hlg_scal end interface interface - subroutine psb_d_hlg_scals(d,a,info) - import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hlg_scals(d,a,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hlg_scals + end subroutine psb_d_cuda_hlg_scals end interface @@ -252,9 +252,9 @@ contains ! == =================================== - function d_hlg_sizeof(a) result(res) + function d_cuda_hlg_sizeof(a) result(res) implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res @@ -269,13 +269,13 @@ contains ! on the GPU device side? ! res = 2*res - end function d_hlg_sizeof + end function d_cuda_hlg_sizeof - function d_hlg_get_fmt() result(res) + function d_cuda_hlg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HLG' - end function d_hlg_get_fmt + end function d_cuda_hlg_get_fmt @@ -291,11 +291,11 @@ contains ! ! == =================================== - subroutine d_hlg_free(a) + subroutine d_cuda_hlg_free(a) use hlldev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) @@ -304,13 +304,13 @@ contains return - end subroutine d_hlg_free + end subroutine d_cuda_hlg_free - subroutine d_hlg_sync(a) + subroutine d_cuda_hlg_sync(a) implicit none - class(psb_d_hlg_sparse_mat), target, intent(in) :: a - class(psb_d_hlg_sparse_mat), pointer :: tmpa + class(psb_d_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -322,77 +322,77 @@ contains call tmpa%set_sync() return - end subroutine d_hlg_sync + end subroutine d_cuda_hlg_sync - subroutine d_hlg_set_host(a) + subroutine d_cuda_hlg_set_host(a) implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine d_hlg_set_host + end subroutine d_cuda_hlg_set_host - subroutine d_hlg_set_dev(a) + subroutine d_cuda_hlg_set_dev(a) implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine d_hlg_set_dev + end subroutine d_cuda_hlg_set_dev - subroutine d_hlg_set_sync(a) + subroutine d_cuda_hlg_set_sync(a) implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine d_hlg_set_sync + end subroutine d_cuda_hlg_set_sync - function d_hlg_is_dev(a) result(res) + function d_cuda_hlg_is_dev(a) result(res) implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function d_hlg_is_dev + end function d_cuda_hlg_is_dev - function d_hlg_is_host(a) result(res) + function d_cuda_hlg_is_host(a) result(res) implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function d_hlg_is_host + end function d_cuda_hlg_is_host - function d_hlg_is_sync(a) result(res) + function d_cuda_hlg_is_sync(a) result(res) implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function d_hlg_is_sync + end function d_cuda_hlg_is_sync - subroutine d_hlg_finalize(a) + subroutine d_cuda_hlg_finalize(a) use hlldev_mod implicit none - type(psb_d_hlg_sparse_mat), intent(inout) :: a + type(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine d_hlg_finalize + end subroutine d_cuda_hlg_finalize #else interface - subroutine psb_d_hlg_mold(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_mold(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hlg_mold + end subroutine psb_d_cuda_hlg_mold end interface #endif -end module psb_d_hlg_mat_mod +end module psb_d_cuda_hlg_mat_mod diff --git a/cuda/psb_d_hybg_mat_mod.F90 b/cuda/psb_d_cuda_hybg_mat_mod.F90 similarity index 52% rename from cuda/psb_d_hybg_mat_mod.F90 rename to cuda/psb_d_cuda_hybg_mat_mod.F90 index d764daa7..be4c8392 100644 --- a/cuda/psb_d_hybg_mat_mod.F90 +++ b/cuda/psb_d_cuda_hybg_mat_mod.F90 @@ -31,13 +31,13 @@ #if CUDA_SHORT_VERSION <= 10 -module psb_d_hybg_mat_mod +module psb_d_cuda_hybg_mat_mod use iso_c_binding use psb_d_mat_mod use cusparse_mod - type, extends(psb_d_csr_sparse_mat) :: psb_d_hybg_sparse_mat + type, extends(psb_d_csr_sparse_mat) :: psb_d_cuda_hybg_sparse_mat ! ! HYBG. An interface to the cuSPARSE HYB ! On the CPU side we keep a CSR storage. @@ -49,170 +49,170 @@ module psb_d_hybg_mat_mod type(d_Hmat) :: deviceMat contains - procedure, nopass :: get_fmt => d_hybg_get_fmt - procedure, pass(a) :: sizeof => d_hybg_sizeof - procedure, pass(a) :: vect_mv => psb_d_hybg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_hybg_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_hybg_csmm - procedure, pass(a) :: csmv => psb_d_hybg_csmv - procedure, pass(a) :: scals => psb_d_hybg_scals - procedure, pass(a) :: scalv => psb_d_hybg_scal - procedure, pass(a) :: reallocate_nz => psb_d_hybg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_hybg_allocate_mnnz + procedure, nopass :: get_fmt => d_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_d_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_d_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_hybg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_hybg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_cp_hybg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_hybg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_mv_hybg_from_fmt - procedure, pass(a) :: free => d_hybg_free - procedure, pass(a) :: mold => psb_d_hybg_mold - procedure, pass(a) :: to_gpu => psb_d_hybg_to_gpu - final :: d_hybg_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => d_cuda_hybg_free + procedure, pass(a) :: mold => psb_d_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_hybg_to_gpu + final :: d_cuda_hybg_finalize #else contains - procedure, pass(a) :: mold => psb_d_hybg_mold + procedure, pass(a) :: mold => psb_d_cuda_hybg_mold #endif - end type psb_d_hybg_sparse_mat + end type psb_d_cuda_hybg_sparse_mat #ifdef HAVE_SPGPU - private :: d_hybg_get_nzeros, d_hybg_free, d_hybg_get_fmt, & - & d_hybg_get_size, d_hybg_sizeof, d_hybg_get_nz_row + private :: d_cuda_hybg_get_nzeros, d_cuda_hybg_free, d_cuda_hybg_get_fmt, & + & d_cuda_hybg_get_size, d_cuda_hybg_sizeof, d_cuda_hybg_get_nz_row interface - subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hybg_inner_vect_sv + end subroutine psb_d_cuda_hybg_inner_vect_sv end interface interface - subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hybg_vect_mv + end subroutine psb_d_cuda_hybg_vect_mv end interface interface - subroutine psb_d_hybg_reallocate_nz(nz,a) - import :: psb_d_hybg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_hybg_reallocate_nz(nz,a) + import :: psb_d_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_d_hybg_sparse_mat), intent(inout) :: a - end subroutine psb_d_hybg_reallocate_nz + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_hybg_reallocate_nz end interface interface - subroutine psb_d_hybg_allocate_mnnz(m,n,a,nz) - import :: psb_d_hybg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_hybg_allocate_mnnz + end subroutine psb_d_cuda_hybg_allocate_mnnz end interface interface - subroutine psb_d_hybg_mold(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_mold(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hybg_mold + end subroutine psb_d_cuda_hybg_mold end interface interface - subroutine psb_d_hybg_to_gpu(a,info, nzrm) - import :: psb_d_hybg_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_d_hybg_to_gpu + end subroutine psb_d_cuda_hybg_to_gpu end interface interface - subroutine psb_d_cp_hybg_from_coo(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_hybg_from_coo + end subroutine psb_d_cuda_cp_hybg_from_coo end interface interface - subroutine psb_d_cp_hybg_from_fmt(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_hybg_from_fmt + end subroutine psb_d_cuda_cp_hybg_from_fmt end interface interface - subroutine psb_d_mv_hybg_from_coo(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_hybg_from_coo + end subroutine psb_d_cuda_mv_hybg_from_coo end interface interface - subroutine psb_d_mv_hybg_from_fmt(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_hybg_from_fmt + end subroutine psb_d_cuda_mv_hybg_from_fmt end interface interface - subroutine psb_d_hybg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hybg_csmv + end subroutine psb_d_cuda_hybg_csmv end interface interface - subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hybg_csmm + end subroutine psb_d_cuda_hybg_csmm end interface interface - subroutine psb_d_hybg_scal(d,a,info,side) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hybg_scal(d,a,info,side) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_d_hybg_scal + end subroutine psb_d_cuda_hybg_scal end interface interface - subroutine psb_d_hybg_scals(d,a,info) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hybg_scals(d,a,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hybg_scals + end subroutine psb_d_cuda_hybg_scals end interface @@ -231,9 +231,9 @@ contains ! == =================================== - function d_hybg_sizeof(a) result(res) + function d_cuda_hybg_sizeof(a) result(res) implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 res = res + psb_sizeof_dp * size(a%val) @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function d_hybg_sizeof + end function d_cuda_hybg_sizeof - function d_hybg_get_fmt() result(res) + function d_cuda_hybg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HYBG' - end function d_hybg_get_fmt + end function d_cuda_hybg_get_fmt @@ -265,42 +265,42 @@ contains ! ! == =================================== - subroutine d_hybg_free(a) + subroutine d_cuda_hybg_free(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) call a%psb_d_csr_sparse_mat%free() return - end subroutine d_hybg_free + end subroutine d_cuda_hybg_free - subroutine d_hybg_finalize(a) + subroutine d_cuda_hybg_finalize(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - type(psb_d_hybg_sparse_mat), intent(inout) :: a + type(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) return - end subroutine d_hybg_finalize + end subroutine d_cuda_hybg_finalize #else interface - subroutine psb_d_hybg_mold(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_mold(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hybg_mold + end subroutine psb_d_cuda_hybg_mold end interface #endif -end module psb_d_hybg_mat_mod +end module psb_d_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_d_gpu_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 similarity index 72% rename from cuda/psb_d_gpu_vect_mod.F90 rename to cuda/psb_d_cuda_vect_mod.F90 index cd3757c3..83ec108b 100644 --- a/cuda/psb_d_gpu_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -30,15 +30,15 @@ ! -module psb_d_gpu_vect_mod +module psb_d_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod use psb_d_vect_mod use psb_i_vect_mod #ifdef HAVE_SPGPU - use psb_gpu_env_mod - use psb_i_gpu_vect_mod + use psb_cuda_env_mod + use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_d_vectordev_mod #endif @@ -47,7 +47,7 @@ module psb_d_gpu_vect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_d_base_vect_type) :: psb_d_vect_gpu + type, extends(psb_d_base_vect_type) :: psb_d_vect_cuda #ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr @@ -59,66 +59,66 @@ module psb_d_gpu_vect_mod type(c_ptr) :: i_buf = c_null_ptr integer :: i_buf_sz = 0 contains - procedure, pass(x) :: get_nrows => d_gpu_get_nrows - procedure, nopass :: get_fmt => d_gpu_get_fmt - - procedure, pass(x) :: all => d_gpu_all - procedure, pass(x) :: zero => d_gpu_zero - procedure, pass(x) :: asb_m => d_gpu_asb_m - procedure, pass(x) :: sync => d_gpu_sync - procedure, pass(x) :: sync_space => d_gpu_sync_space - procedure, pass(x) :: bld_x => d_gpu_bld_x - procedure, pass(x) :: bld_mn => d_gpu_bld_mn - procedure, pass(x) :: free => d_gpu_free - procedure, pass(x) :: ins_a => d_gpu_ins_a - procedure, pass(x) :: ins_v => d_gpu_ins_v - procedure, pass(x) :: is_host => d_gpu_is_host - procedure, pass(x) :: is_dev => d_gpu_is_dev - procedure, pass(x) :: is_sync => d_gpu_is_sync - procedure, pass(x) :: set_host => d_gpu_set_host - procedure, pass(x) :: set_dev => d_gpu_set_dev - procedure, pass(x) :: set_sync => d_gpu_set_sync - procedure, pass(x) :: set_scal => d_gpu_set_scal -!!$ procedure, pass(x) :: set_vect => d_gpu_set_vect - procedure, pass(x) :: gthzv_x => d_gpu_gthzv_x - procedure, pass(y) :: sctb => d_gpu_sctb - procedure, pass(y) :: sctb_x => d_gpu_sctb_x - procedure, pass(x) :: gthzbuf => d_gpu_gthzbuf - procedure, pass(y) :: sctb_buf => d_gpu_sctb_buf - procedure, pass(x) :: new_buffer => d_gpu_new_buffer - procedure, nopass :: device_wait => d_gpu_device_wait - procedure, pass(x) :: free_buffer => d_gpu_free_buffer - procedure, pass(x) :: maybe_free_buffer => d_gpu_maybe_free_buffer - procedure, pass(x) :: dot_v => d_gpu_dot_v - procedure, pass(x) :: dot_a => d_gpu_dot_a - procedure, pass(y) :: axpby_v => d_gpu_axpby_v - procedure, pass(y) :: axpby_a => d_gpu_axpby_a - procedure, pass(y) :: mlt_v => d_gpu_mlt_v - procedure, pass(y) :: mlt_a => d_gpu_mlt_a - procedure, pass(z) :: mlt_a_2 => d_gpu_mlt_a_2 - procedure, pass(z) :: mlt_v_2 => d_gpu_mlt_v_2 - procedure, pass(x) :: scal => d_gpu_scal - procedure, pass(x) :: nrm2 => d_gpu_nrm2 - procedure, pass(x) :: amax => d_gpu_amax - procedure, pass(x) :: asum => d_gpu_asum - procedure, pass(x) :: absval1 => d_gpu_absval1 - procedure, pass(x) :: absval2 => d_gpu_absval2 - - final :: d_gpu_vect_finalize + procedure, pass(x) :: get_nrows => d_cuda_get_nrows + procedure, nopass :: get_fmt => d_cuda_get_fmt + + procedure, pass(x) :: all => d_cuda_all + procedure, pass(x) :: zero => d_cuda_zero + procedure, pass(x) :: asb_m => d_cuda_asb_m + procedure, pass(x) :: sync => d_cuda_sync + procedure, pass(x) :: sync_space => d_cuda_sync_space + procedure, pass(x) :: bld_x => d_cuda_bld_x + procedure, pass(x) :: bld_mn => d_cuda_bld_mn + procedure, pass(x) :: free => d_cuda_free + procedure, pass(x) :: ins_a => d_cuda_ins_a + procedure, pass(x) :: ins_v => d_cuda_ins_v + procedure, pass(x) :: is_host => d_cuda_is_host + procedure, pass(x) :: is_dev => d_cuda_is_dev + procedure, pass(x) :: is_sync => d_cuda_is_sync + procedure, pass(x) :: set_host => d_cuda_set_host + procedure, pass(x) :: set_dev => d_cuda_set_dev + procedure, pass(x) :: set_sync => d_cuda_set_sync + procedure, pass(x) :: set_scal => d_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => d_cuda_set_vect + procedure, pass(x) :: gthzv_x => d_cuda_gthzv_x + procedure, pass(y) :: sctb => d_cuda_sctb + procedure, pass(y) :: sctb_x => d_cuda_sctb_x + procedure, pass(x) :: gthzbuf => d_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => d_cuda_sctb_buf + procedure, pass(x) :: new_buffer => d_cuda_new_buffer + procedure, nopass :: device_wait => d_cuda_device_wait + procedure, pass(x) :: free_buffer => d_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => d_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => d_cuda_dot_v + procedure, pass(x) :: dot_a => d_cuda_dot_a + procedure, pass(y) :: axpby_v => d_cuda_axpby_v + procedure, pass(y) :: axpby_a => d_cuda_axpby_a + procedure, pass(y) :: mlt_v => d_cuda_mlt_v + procedure, pass(y) :: mlt_a => d_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => d_cuda_mlt_v_2 + procedure, pass(x) :: scal => d_cuda_scal + procedure, pass(x) :: nrm2 => d_cuda_nrm2 + procedure, pass(x) :: amax => d_cuda_amax + procedure, pass(x) :: asum => d_cuda_asum + procedure, pass(x) :: absval1 => d_cuda_absval1 + procedure, pass(x) :: absval2 => d_cuda_absval2 + + final :: d_cuda_vect_finalize #endif - end type psb_d_vect_gpu + end type psb_d_vect_cuda - public :: psb_d_vect_gpu_ + public :: psb_d_vect_cuda_ private :: constructor - interface psb_d_vect_gpu_ + interface psb_d_vect_cuda_ module procedure constructor - end interface psb_d_vect_gpu_ + end interface psb_d_vect_cuda_ contains function constructor(x) result(this) real(psb_dpk_) :: x(:) - type(psb_d_vect_gpu) :: this + type(psb_d_vect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -128,20 +128,20 @@ contains #ifdef HAVE_SPGPU - subroutine d_gpu_device_wait() + subroutine d_cuda_device_wait() call psb_cudaSync() - end subroutine d_gpu_device_wait + end subroutine d_cuda_device_wait - subroutine d_gpu_new_buffer(n,x,info) + subroutine d_cuda_new_buffer(n,x,info) use psb_realloc_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(x%combuf)) then if (size(x%combuf) idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then ! ! Only need a sync in this branch; in the others ! cudamemCpy acts as a sync point. @@ -331,14 +331,14 @@ contains end select - end subroutine d_gpu_gthzv_x + end subroutine d_cuda_gthzv_x - subroutine d_gpu_gthzbuf(i,n,idx,x) - use psb_gpu_env_mod + subroutine d_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i,n class(psb_i_base_vect_type) :: idx - class(psb_d_vect_gpu) :: x + class(psb_d_vect_cuda) :: x integer :: info, ni info = 0 @@ -349,11 +349,11 @@ contains end if select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) @@ -384,14 +384,14 @@ contains end select - end subroutine d_gpu_gthzbuf + end subroutine d_cuda_gthzbuf - subroutine d_gpu_sctb(n,idx,x,beta,y) + subroutine d_cuda_sctb(n,idx,x,beta,y) implicit none !use psb_const_mod integer(psb_ipk_) :: n, idx(:) real(psb_dpk_) :: beta, x(:) - class(psb_d_vect_gpu) :: y + class(psb_d_vect_cuda) :: y integer(psb_ipk_) :: info if (n == 0) return @@ -401,24 +401,24 @@ contains call y%psb_d_base_vect_type%sctb(n,idx,x,beta) call y%set_host() - end subroutine d_gpu_sctb + end subroutine d_cuda_sctb - subroutine d_gpu_sctb_x(i,n,idx,x,beta,y) - use psb_gpu_env_mod + subroutine d_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx real(psb_dpk_) :: beta, x(:) - class(psb_d_vect_gpu) :: y + class(psb_d_vect_cuda) :: y integer :: info, ni select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() ! - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(y%pinned_buffer)) then if (size(y%pinned_buffer) < n) then call inner_unregister(y%pinned_buffer) @@ -506,16 +506,16 @@ contains call psb_cudaSync() call y%set_dev() - end subroutine d_gpu_sctb_x + end subroutine d_cuda_sctb_x - subroutine d_gpu_sctb_buf(i,n,idx,beta,y) + subroutine d_cuda_sctb_buf(i,n,idx,beta,y) use psi_serial_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx real(psb_dpk_) :: beta - class(psb_d_vect_gpu) :: y + class(psb_d_vect_cuda) :: y integer(psb_ipk_) :: info, ni !!$ write(0,*) 'Starting sctb_buf' @@ -526,11 +526,11 @@ contains select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) else @@ -557,106 +557,106 @@ contains end select !!$ write(0,*) 'Done sctb_buf' - end subroutine d_gpu_sctb_buf + end subroutine d_cuda_sctb_buf - subroutine d_gpu_bld_x(x,this) + subroutine d_cuda_bld_x(x,this) use psb_base_mod real(psb_dpk_), intent(in) :: this(:) - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call psb_realloc(size(this),x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'d_gpu_bld_x',& + call psb_errpush(info,'d_cuda_bld_x',& & i_err=(/size(this),izero,izero,izero,izero/)) end if x%v(:) = this(:) call x%set_host() call x%sync() - end subroutine d_gpu_bld_x + end subroutine d_cuda_bld_x - subroutine d_gpu_bld_mn(x,n) + subroutine d_cuda_bld_mn(x,n) integer(psb_mpk_), intent(in) :: n - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(n,info) if (info /= 0) then - call psb_errpush(info,'d_gpu_bld_n',i_err=(/n,n,n,n,n/)) + call psb_errpush(info,'d_cuda_bld_n',i_err=(/n,n,n,n,n/)) end if - end subroutine d_gpu_bld_mn + end subroutine d_cuda_bld_mn - subroutine d_gpu_set_host(x) + subroutine d_cuda_set_host(x) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x x%state = is_host - end subroutine d_gpu_set_host + end subroutine d_cuda_set_host - subroutine d_gpu_set_dev(x) + subroutine d_cuda_set_dev(x) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x x%state = is_dev - end subroutine d_gpu_set_dev + end subroutine d_cuda_set_dev - subroutine d_gpu_set_sync(x) + subroutine d_cuda_set_sync(x) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x x%state = is_sync - end subroutine d_gpu_set_sync + end subroutine d_cuda_set_sync - function d_gpu_is_dev(x) result(res) + function d_cuda_is_dev(x) result(res) implicit none - class(psb_d_vect_gpu), intent(in) :: x + class(psb_d_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function d_gpu_is_dev + end function d_cuda_is_dev - function d_gpu_is_host(x) result(res) + function d_cuda_is_host(x) result(res) implicit none - class(psb_d_vect_gpu), intent(in) :: x + class(psb_d_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function d_gpu_is_host + end function d_cuda_is_host - function d_gpu_is_sync(x) result(res) + function d_cuda_is_sync(x) result(res) implicit none - class(psb_d_vect_gpu), intent(in) :: x + class(psb_d_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function d_gpu_is_sync + end function d_cuda_is_sync - function d_gpu_get_nrows(x) result(res) + function d_cuda_get_nrows(x) result(res) implicit none - class(psb_d_vect_gpu), intent(in) :: x + class(psb_d_vect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = size(x%v) - end function d_gpu_get_nrows + end function d_cuda_get_nrows - function d_gpu_get_fmt() result(res) + function d_cuda_get_fmt() result(res) implicit none character(len=5) :: res res = 'dGPU' - end function d_gpu_get_fmt + end function d_cuda_get_fmt - subroutine d_gpu_all(n, x, info) + subroutine d_cuda_all(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: n - class(psb_d_vect_gpu), intent(out) :: x + class(psb_d_vect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(n,x%v,info) @@ -664,26 +664,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'d_gpu_all',& + call psb_errpush(info,'d_cuda_all',& & i_err=(/n,n,n,n,n/)) end if - end subroutine d_gpu_all + end subroutine d_cuda_all - subroutine d_gpu_zero(x) + subroutine d_cuda_zero(x) use psi_serial_mod implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine d_gpu_zero + end subroutine d_cuda_zero - subroutine d_gpu_asb_m(n, x, info) + subroutine d_cuda_asb_m(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_mpk_), intent(in) :: n - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: nd @@ -703,12 +703,12 @@ contains end if end if - end subroutine d_gpu_asb_m + end subroutine d_cuda_asb_m - subroutine d_gpu_sync_space(x,info) + subroutine d_cuda_sync_space(x,info) use psb_base_mod, only : psb_realloc implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh, nd @@ -747,12 +747,12 @@ contains end if end if - end subroutine d_gpu_sync_space + end subroutine d_cuda_sync_space - subroutine d_gpu_sync(x) + subroutine d_cuda_sync(x) use psb_base_mod, only : psb_realloc implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -778,31 +778,31 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'d_gpu_sync') + call psb_errpush(info,'d_cuda_sync') end if - end subroutine d_gpu_sync + end subroutine d_cuda_sync - subroutine d_gpu_free(x, info) + subroutine d_cuda_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) if (c_associated(x%deviceVect)) then -!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' call freeMultiVecDevice(x%deviceVect) x%deviceVect=c_null_ptr end if call x%free_buffer(info) call x%set_sync() - end subroutine d_gpu_free + end subroutine d_cuda_free - subroutine d_gpu_set_scal(x,val,first,last) - class(psb_d_vect_gpu), intent(inout) :: x + subroutine d_cuda_set_scal(x,val,first,last) + class(psb_d_vect_cuda), intent(inout) :: x real(psb_dpk_), intent(in) :: val integer(psb_ipk_), optional :: first, last @@ -817,10 +817,10 @@ contains info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() - end subroutine d_gpu_set_scal + end subroutine d_cuda_set_scal !!$ -!!$ subroutine d_gpu_set_vect(x,val) -!!$ class(psb_d_vect_gpu), intent(inout) :: x +!!$ subroutine d_cuda_set_vect(x,val) +!!$ class(psb_d_vect_cuda), intent(inout) :: x !!$ real(psb_dpk_), intent(in) :: val(:) !!$ integer(psb_ipk_) :: nr !!$ integer(psb_ipk_) :: info @@ -829,13 +829,13 @@ contains !!$ call x%psb_d_base_vect_type%set_vect(val) !!$ call x%set_host() !!$ -!!$ end subroutine d_gpu_set_vect +!!$ end subroutine d_cuda_set_vect - function d_gpu_dot_v(n,x,y) result(res) + function d_cuda_dot_v(n,x,y) result(res) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res @@ -852,13 +852,13 @@ contains type is (psb_d_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n,x%v,1,yy%v,1) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) if (info /= 0) then info = psb_err_internal_error_ - call psb_errpush(info,'d_gpu_dot_v') + call psb_errpush(info,'d_cuda_dot_v') end if class default @@ -867,11 +867,11 @@ contains res = y%dot(n,x%v) end select - end function d_gpu_dot_v + end function d_cuda_dot_v - function d_gpu_dot_a(n,x,y) result(res) + function d_cuda_dot_a(n,x,y) result(res) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x real(psb_dpk_), intent(in) :: y(:) integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res @@ -880,14 +880,14 @@ contains if (x%is_dev()) call x%sync() res = ddot(n,y,1,x%v,1) - end function d_gpu_dot_a + end function d_cuda_dot_a - subroutine d_gpu_axpby_v(m,alpha, x, beta, y, info) + subroutine d_cuda_axpby_v(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_vect_gpu), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: y real(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny @@ -895,7 +895,7 @@ contains info = psb_success_ select type(xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) ! Do something different here if ((beta /= dzero).and.y%is_host())& & call y%sync() @@ -915,14 +915,14 @@ contains call y%axpby(m,alpha,x%v,beta,info) end select - end subroutine d_gpu_axpby_v + end subroutine d_cuda_axpby_v - subroutine d_gpu_axpby_a(m,alpha, x, beta, y, info) + subroutine d_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m real(psb_dpk_), intent(in) :: x(:) - class(psb_d_vect_gpu), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: y real(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info @@ -930,13 +930,13 @@ contains & call y%sync() call psb_geaxpby(m,alpha,x,beta,y%v,info) call y%set_host() - end subroutine d_gpu_axpby_a + end subroutine d_cuda_axpby_a - subroutine d_gpu_mlt_v(x, y, info) + subroutine d_cuda_mlt_v(x, y, info) use psi_serial_mod implicit none class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_vect_gpu), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -950,7 +950,7 @@ contains y%v(i) = y%v(i) * xx%v(i) end do call y%set_host() - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) ! Do something different here if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -963,13 +963,13 @@ contains call y%set_host() end select - end subroutine d_gpu_mlt_v + end subroutine d_cuda_mlt_v - subroutine d_gpu_mlt_a(x, y, info) + subroutine d_cuda_mlt_a(x, y, info) use psi_serial_mod implicit none real(psb_dpk_), intent(in) :: x(:) - class(psb_d_vect_gpu), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -977,15 +977,15 @@ contains if (y%is_dev()) call y%sync() call y%psb_d_base_vect_type%mlt(x,info) ! set_host() is invoked in the base method - end subroutine d_gpu_mlt_a + end subroutine d_cuda_mlt_a - subroutine d_gpu_mlt_a_2(alpha,x,y,beta,z,info) + subroutine d_cuda_mlt_a_2(alpha,x,y,beta,z,info) use psi_serial_mod implicit none real(psb_dpk_), intent(in) :: alpha,beta real(psb_dpk_), intent(in) :: x(:) real(psb_dpk_), intent(in) :: y(:) - class(psb_d_vect_gpu), intent(inout) :: z + class(psb_d_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -993,16 +993,16 @@ contains if (z%is_dev()) call z%sync() call z%psb_d_base_vect_type%mlt(alpha,x,y,beta,info) ! set_host() is invoked in the base method - end subroutine d_gpu_mlt_a_2 + end subroutine d_cuda_mlt_a_2 - subroutine d_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + subroutine d_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) use psi_serial_mod use psb_string_mod implicit none real(psb_dpk_), intent(in) :: alpha,beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y - class(psb_d_vect_gpu), intent(inout) :: z + class(psb_d_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n @@ -1025,9 +1025,9 @@ contains ! info = 0 select type(xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type (yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= dzero).and.(z%is_host())) call z%sync() @@ -1049,23 +1049,23 @@ contains call z%psb_d_base_vect_type%mlt(alpha,x,y,beta,info) call z%set_host() end select - end subroutine d_gpu_mlt_v_2 + end subroutine d_cuda_mlt_v_2 - subroutine d_gpu_scal(alpha, x) + subroutine d_cuda_scal(alpha, x) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x real(psb_dpk_), intent (in) :: alpha integer(psb_ipk_) :: info if (x%is_host()) call x%sync() info = scalMultiVecDevice(alpha,x%deviceVect) call x%set_dev() - end subroutine d_gpu_scal + end subroutine d_cuda_scal - function d_gpu_nrm2(n,x) result(res) + function d_cuda_nrm2(n,x) result(res) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1073,11 +1073,11 @@ contains if (x%is_host()) call x%sync() info = nrm2MultiVecDevice(res,n,x%deviceVect) - end function d_gpu_nrm2 + end function d_cuda_nrm2 - function d_gpu_amax(n,x) result(res) + function d_cuda_amax(n,x) result(res) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1085,11 +1085,11 @@ contains if (x%is_host()) call x%sync() info = amaxMultiVecDevice(res,n,x%deviceVect) - end function d_gpu_amax + end function d_cuda_amax - function d_gpu_asum(n,x) result(res) + function d_cuda_asum(n,x) result(res) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1097,11 +1097,11 @@ contains if (x%is_host()) call x%sync() info = asumMultiVecDevice(res,n,x%deviceVect) - end function d_gpu_asum + end function d_cuda_asum - subroutine d_gpu_absval1(x) + subroutine d_cuda_absval1(x) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n integer(psb_ipk_) :: info @@ -1109,18 +1109,18 @@ contains n=x%get_nrows() info = absMultiVecDevice(n,done,x%deviceVect) - end subroutine d_gpu_absval1 + end subroutine d_cuda_absval1 - subroutine d_gpu_absval2(x,y) + subroutine d_cuda_absval2(x,y) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_) :: n integer(psb_ipk_) :: info n=min(x%get_nrows(),y%get_nrows()) select type (yy=> y) - class is (psb_d_vect_gpu) + class is (psb_d_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = absMultiVecDevice(n,done,x%deviceVect,yy%deviceVect) @@ -1129,67 +1129,67 @@ contains if (y%is_dev()) call y%sync() call x%psb_d_base_vect_type%absval(y) end select - end subroutine d_gpu_absval2 + end subroutine d_cuda_absval2 - subroutine d_gpu_vect_finalize(x) + subroutine d_cuda_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_d_vect_gpu), intent(inout) :: x + type(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 call x%free(info) - end subroutine d_gpu_vect_finalize + end subroutine d_cuda_vect_finalize - subroutine d_gpu_ins_v(n,irl,val,dupl,x,info) + subroutine d_cuda_ins_v(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl class(psb_i_base_vect_type), intent(inout) :: irl class(psb_d_base_vect_type), intent(inout) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, isz - logical :: done_gpu + logical :: done_cuda info = 0 if (psb_errstatus_fatal()) return - done_gpu = .false. + done_cuda = .false. select type(virl => irl) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type(vval => val) - class is (psb_d_vect_gpu) + class is (psb_d_vect_cuda) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() info = geinsMultiVecDeviceDouble(n,virl%deviceVect,& & vval%deviceVect,dupl,1,x%deviceVect) call x%set_dev() - done_gpu=.true. + done_cuda=.true. end select end select - if (.not.done_gpu) then + if (.not.done_cuda) then if (irl%is_dev()) call irl%sync() if (val%is_dev()) call val%sync() call x%ins(n,irl%v,val%v,dupl,info) end if if (info /= 0) then - call psb_errpush(info,'gpu_vect_ins') + call psb_errpush(info,'cuda_vect_ins') return end if - end subroutine d_gpu_ins_v + end subroutine d_cuda_ins_v - subroutine d_gpu_ins_a(n,irl,val,dupl,x,info) + subroutine d_cuda_ins_a(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) real(psb_dpk_), intent(in) :: val(:) @@ -1202,11 +1202,11 @@ contains call x%psb_d_base_vect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine d_gpu_ins_a + end subroutine d_cuda_ins_a #endif -end module psb_d_gpu_vect_mod +end module psb_d_cuda_vect_mod ! @@ -1215,7 +1215,7 @@ end module psb_d_gpu_vect_mod -module psb_d_gpu_multivect_mod +module psb_d_cuda_multivect_mod use iso_c_binding use psb_const_mod use psb_error_mod @@ -1224,7 +1224,7 @@ module psb_d_gpu_multivect_mod use psb_i_multivect_mod #ifdef HAVE_SPGPU - use psb_i_gpu_multivect_mod + use psb_i_cuda_multivect_mod use psb_d_vectordev_mod #endif @@ -1232,7 +1232,7 @@ module psb_d_gpu_multivect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_d_base_multivect_type) :: psb_d_multivect_gpu + type, extends(psb_d_base_multivect_type) :: psb_d_multivect_cuda #ifdef HAVE_SPGPU integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 @@ -1240,48 +1240,48 @@ module psb_d_gpu_multivect_mod real(c_double), allocatable :: buffer(:,:) type(c_ptr) :: dt_buf = c_null_ptr contains - procedure, pass(x) :: get_nrows => d_gpu_multi_get_nrows - procedure, pass(x) :: get_ncols => d_gpu_multi_get_ncols - procedure, nopass :: get_fmt => d_gpu_multi_get_fmt -!!$ procedure, pass(x) :: dot_v => d_gpu_multi_dot_v -!!$ procedure, pass(x) :: dot_a => d_gpu_multi_dot_a -!!$ procedure, pass(y) :: axpby_v => d_gpu_multi_axpby_v -!!$ procedure, pass(y) :: axpby_a => d_gpu_multi_axpby_a -!!$ procedure, pass(y) :: mlt_v => d_gpu_multi_mlt_v -!!$ procedure, pass(y) :: mlt_a => d_gpu_multi_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => d_gpu_multi_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => d_gpu_multi_mlt_v_2 -!!$ procedure, pass(x) :: scal => d_gpu_multi_scal -!!$ procedure, pass(x) :: nrm2 => d_gpu_multi_nrm2 -!!$ procedure, pass(x) :: amax => d_gpu_multi_amax -!!$ procedure, pass(x) :: asum => d_gpu_multi_asum - procedure, pass(x) :: all => d_gpu_multi_all - procedure, pass(x) :: zero => d_gpu_multi_zero - procedure, pass(x) :: asb => d_gpu_multi_asb - procedure, pass(x) :: sync => d_gpu_multi_sync - procedure, pass(x) :: sync_space => d_gpu_multi_sync_space - procedure, pass(x) :: bld_x => d_gpu_multi_bld_x - procedure, pass(x) :: bld_n => d_gpu_multi_bld_n - procedure, pass(x) :: free => d_gpu_multi_free - procedure, pass(x) :: ins => d_gpu_multi_ins - procedure, pass(x) :: is_host => d_gpu_multi_is_host - procedure, pass(x) :: is_dev => d_gpu_multi_is_dev - procedure, pass(x) :: is_sync => d_gpu_multi_is_sync - procedure, pass(x) :: set_host => d_gpu_multi_set_host - procedure, pass(x) :: set_dev => d_gpu_multi_set_dev - procedure, pass(x) :: set_sync => d_gpu_multi_set_sync - procedure, pass(x) :: set_scal => d_gpu_multi_set_scal - procedure, pass(x) :: set_vect => d_gpu_multi_set_vect -!!$ procedure, pass(x) :: gthzv_x => d_gpu_multi_gthzv_x -!!$ procedure, pass(y) :: sctb => d_gpu_multi_sctb -!!$ procedure, pass(y) :: sctb_x => d_gpu_multi_sctb_x - final :: d_gpu_multi_vect_finalize + procedure, pass(x) :: get_nrows => d_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => d_cuda_multi_get_ncols + procedure, nopass :: get_fmt => d_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => d_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => d_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => d_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => d_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => d_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => d_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => d_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => d_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => d_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => d_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => d_cuda_multi_amax +!!$ procedure, pass(x) :: asum => d_cuda_multi_asum + procedure, pass(x) :: all => d_cuda_multi_all + procedure, pass(x) :: zero => d_cuda_multi_zero + procedure, pass(x) :: asb => d_cuda_multi_asb + procedure, pass(x) :: sync => d_cuda_multi_sync + procedure, pass(x) :: sync_space => d_cuda_multi_sync_space + procedure, pass(x) :: bld_x => d_cuda_multi_bld_x + procedure, pass(x) :: bld_n => d_cuda_multi_bld_n + procedure, pass(x) :: free => d_cuda_multi_free + procedure, pass(x) :: ins => d_cuda_multi_ins + procedure, pass(x) :: is_host => d_cuda_multi_is_host + procedure, pass(x) :: is_dev => d_cuda_multi_is_dev + procedure, pass(x) :: is_sync => d_cuda_multi_is_sync + procedure, pass(x) :: set_host => d_cuda_multi_set_host + procedure, pass(x) :: set_dev => d_cuda_multi_set_dev + procedure, pass(x) :: set_sync => d_cuda_multi_set_sync + procedure, pass(x) :: set_scal => d_cuda_multi_set_scal + procedure, pass(x) :: set_vect => d_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => d_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => d_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => d_cuda_multi_sctb_x + final :: d_cuda_multi_vect_finalize #endif - end type psb_d_multivect_gpu + end type psb_d_multivect_cuda - public :: psb_d_multivect_gpu + public :: psb_d_multivect_cuda private :: constructor - interface psb_d_multivect_gpu + interface psb_d_multivect_cuda module procedure constructor end interface @@ -1289,7 +1289,7 @@ contains function constructor(x) result(this) real(psb_dpk_) :: x(:,:) - type(psb_d_multivect_gpu) :: this + type(psb_d_multivect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -1299,15 +1299,15 @@ contains #ifdef HAVE_SPGPU -!!$ subroutine d_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ subroutine d_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i,n !!$ class(psb_i_base_multivect_type) :: idx !!$ real(psb_dpk_) :: y(:) -!!$ class(psb_d_multivect_gpu) :: x +!!$ class(psb_d_multivect_cuda) :: x !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (x%is_host()) call x%sync() !!$ @@ -1332,16 +1332,16 @@ contains !!$ end select !!$ !!$ -!!$ end subroutine d_gpu_multi_gthzv_x +!!$ end subroutine d_cuda_multi_gthzv_x !!$ !!$ !!$ -!!$ subroutine d_gpu_multi_sctb(n,idx,x,beta,y) +!!$ subroutine d_cuda_multi_sctb(n,idx,x,beta,y) !!$ implicit none !!$ !use psb_const_mod !!$ integer(psb_ipk_) :: n, idx(:) !!$ real(psb_dpk_) :: beta, x(:) -!!$ class(psb_d_multivect_gpu) :: y +!!$ class(psb_d_multivect_cuda) :: y !!$ integer(psb_ipk_) :: info !!$ !!$ if (n == 0) return @@ -1351,17 +1351,17 @@ contains !!$ call y%psb_d_base_multivect_type%sctb(n,idx,x,beta) !!$ call y%set_host() !!$ -!!$ end subroutine d_gpu_multi_sctb +!!$ end subroutine d_cuda_multi_sctb !!$ -!!$ subroutine d_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ subroutine d_cuda_multi_sctb_x(i,n,idx,x,beta,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i, n !!$ class(psb_i_base_multivect_type) :: idx !!$ real(psb_dpk_) :: beta, x(:) -!!$ class(psb_d_multivect_gpu) :: y +!!$ class(psb_d_multivect_cuda) :: y !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (y%is_host()) call y%sync() !!$ @@ -1387,13 +1387,13 @@ contains !!$ call y%sct(n,ii%v(i:),x,beta) !!$ end select !!$ -!!$ end subroutine d_gpu_multi_sctb_x +!!$ end subroutine d_cuda_multi_sctb_x - subroutine d_gpu_multi_bld_x(x,this) + subroutine d_cuda_multi_bld_x(x,this) use psb_base_mod real(psb_dpk_), intent(in) :: this(:,:) - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info, m, n m=size(this,1) @@ -1403,101 +1403,101 @@ contains call psb_realloc(m,n,x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'d_gpu_multi_bld_x',& + call psb_errpush(info,'d_cuda_multi_bld_x',& & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) end if x%v(1:m,1:n) = this(1:m,1:n) call x%set_host() call x%sync() - end subroutine d_gpu_multi_bld_x + end subroutine d_cuda_multi_bld_x - subroutine d_gpu_multi_bld_n(x,m,n) + subroutine d_cuda_multi_bld_n(x,m,n) integer(psb_ipk_), intent(in) :: m,n - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(m,n,info) if (info /= 0) then - call psb_errpush(info,'d_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + call psb_errpush(info,'d_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) end if - end subroutine d_gpu_multi_bld_n + end subroutine d_cuda_multi_bld_n - subroutine d_gpu_multi_set_host(x) + subroutine d_cuda_multi_set_host(x) implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x x%state = is_host - end subroutine d_gpu_multi_set_host + end subroutine d_cuda_multi_set_host - subroutine d_gpu_multi_set_dev(x) + subroutine d_cuda_multi_set_dev(x) implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x x%state = is_dev - end subroutine d_gpu_multi_set_dev + end subroutine d_cuda_multi_set_dev - subroutine d_gpu_multi_set_sync(x) + subroutine d_cuda_multi_set_sync(x) implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x x%state = is_sync - end subroutine d_gpu_multi_set_sync + end subroutine d_cuda_multi_set_sync - function d_gpu_multi_is_dev(x) result(res) + function d_cuda_multi_is_dev(x) result(res) implicit none - class(psb_d_multivect_gpu), intent(in) :: x + class(psb_d_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function d_gpu_multi_is_dev + end function d_cuda_multi_is_dev - function d_gpu_multi_is_host(x) result(res) + function d_cuda_multi_is_host(x) result(res) implicit none - class(psb_d_multivect_gpu), intent(in) :: x + class(psb_d_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function d_gpu_multi_is_host + end function d_cuda_multi_is_host - function d_gpu_multi_is_sync(x) result(res) + function d_cuda_multi_is_sync(x) result(res) implicit none - class(psb_d_multivect_gpu), intent(in) :: x + class(psb_d_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function d_gpu_multi_is_sync + end function d_cuda_multi_is_sync - function d_gpu_multi_get_nrows(x) result(res) + function d_cuda_multi_get_nrows(x) result(res) implicit none - class(psb_d_multivect_gpu), intent(in) :: x + class(psb_d_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_nrows - end function d_gpu_multi_get_nrows + end function d_cuda_multi_get_nrows - function d_gpu_multi_get_ncols(x) result(res) + function d_cuda_multi_get_ncols(x) result(res) implicit none - class(psb_d_multivect_gpu), intent(in) :: x + class(psb_d_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_ncols - end function d_gpu_multi_get_ncols + end function d_cuda_multi_get_ncols - function d_gpu_multi_get_fmt() result(res) + function d_cuda_multi_get_fmt() result(res) implicit none character(len=5) :: res res = 'dGPU' - end function d_gpu_multi_get_fmt + end function d_cuda_multi_get_fmt -!!$ function d_gpu_multi_dot_v(n,x,y) result(res) +!!$ function d_cuda_multi_dot_v(n,x,y) result(res) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ class(psb_d_base_multivect_type), intent(inout) :: y !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res @@ -1514,13 +1514,13 @@ contains !!$ type is (psb_d_base_multivect_type) !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,x%v,1,yy%v,1) -!!$ type is (psb_d_multivect_gpu) +!!$ type is (psb_d_multivect_cuda) !!$ if (x%is_host()) call x%sync() !!$ if (yy%is_host()) call yy%sync() !!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) !!$ if (info /= 0) then !!$ info = psb_err_internal_error_ -!!$ call psb_errpush(info,'d_gpu_multi_dot_v') +!!$ call psb_errpush(info,'d_cuda_multi_dot_v') !!$ end if !!$ !!$ class default @@ -1529,11 +1529,11 @@ contains !!$ res = y%dot(n,x%v) !!$ end select !!$ -!!$ end function d_gpu_multi_dot_v +!!$ end function d_cuda_multi_dot_v !!$ -!!$ function d_gpu_multi_dot_a(n,x,y) result(res) +!!$ function d_cuda_multi_dot_a(n,x,y) result(res) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ real(psb_dpk_), intent(in) :: y(:) !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res @@ -1542,14 +1542,14 @@ contains !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,y,1,x%v,1) !!$ -!!$ end function d_gpu_multi_dot_a +!!$ end function d_cuda_multi_dot_a !!$ -!!$ subroutine d_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine d_cuda_multi_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ class(psb_d_base_multivect_type), intent(inout) :: x -!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ class(psb_d_multivect_cuda), intent(inout) :: y !!$ real(psb_dpk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: nx, ny @@ -1562,7 +1562,7 @@ contains !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() -!!$ type is (psb_d_multivect_gpu) +!!$ type is (psb_d_multivect_cuda) !!$ ! Do something different here !!$ if ((beta /= dzero).and.y%is_host())& !!$ & call y%sync() @@ -1581,27 +1581,27 @@ contains !!$ call y%axpby(m,alpha,x%v,beta,info) !!$ end select !!$ -!!$ end subroutine d_gpu_multi_axpby_v +!!$ end subroutine d_cuda_multi_axpby_v !!$ -!!$ subroutine d_gpu_multi_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine d_cuda_multi_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ real(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ class(psb_d_multivect_cuda), intent(inout) :: y !!$ real(psb_dpk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ if (y%is_dev()) call y%sync() !!$ call psb_geaxpby(m,alpha,x,beta,y%v,info) !!$ call y%set_host() -!!$ end subroutine d_gpu_multi_axpby_a +!!$ end subroutine d_cuda_multi_axpby_a !!$ -!!$ subroutine d_gpu_multi_mlt_v(x, y, info) +!!$ subroutine d_cuda_multi_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_d_base_multivect_type), intent(inout) :: x -!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ class(psb_d_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ integer(psb_ipk_) :: i, n @@ -1615,7 +1615,7 @@ contains !!$ y%v(i) = y%v(i) * xx%v(i) !!$ end do !!$ call y%set_host() -!!$ type is (psb_d_multivect_gpu) +!!$ type is (psb_d_multivect_cuda) !!$ ! Do something different here !!$ if (y%is_host()) call y%sync() !!$ if (xx%is_host()) call xx%sync() @@ -1627,13 +1627,13 @@ contains !!$ call y%set_host() !!$ end select !!$ -!!$ end subroutine d_gpu_multi_mlt_v +!!$ end subroutine d_cuda_multi_mlt_v !!$ -!!$ subroutine d_gpu_multi_mlt_a(x, y, info) +!!$ subroutine d_cuda_multi_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ class(psb_d_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1641,15 +1641,15 @@ contains !!$ call y%sync() !!$ call y%psb_d_base_multivect_type%mlt(x,info) !!$ call y%set_host() -!!$ end subroutine d_gpu_multi_mlt_a +!!$ end subroutine d_cuda_multi_mlt_a !!$ -!!$ subroutine d_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine d_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_dpk_), intent(in) :: alpha,beta !!$ real(psb_dpk_), intent(in) :: x(:) !!$ real(psb_dpk_), intent(in) :: y(:) -!!$ class(psb_d_multivect_gpu), intent(inout) :: z +!!$ class(psb_d_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1657,16 +1657,16 @@ contains !!$ if (z%is_dev()) call z%sync() !!$ call z%psb_d_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() -!!$ end subroutine d_gpu_multi_mlt_a_2 +!!$ end subroutine d_cuda_multi_mlt_a_2 !!$ -!!$ subroutine d_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ subroutine d_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod !!$ implicit none !!$ real(psb_dpk_), intent(in) :: alpha,beta !!$ class(psb_d_base_multivect_type), intent(inout) :: x !!$ class(psb_d_base_multivect_type), intent(inout) :: y -!!$ class(psb_d_multivect_gpu), intent(inout) :: z +!!$ class(psb_d_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ character(len=1), intent(in), optional :: conjgx, conjgy !!$ integer(psb_ipk_) :: i, n @@ -1689,9 +1689,9 @@ contains !!$ ! !!$ info = 0 !!$ select type(xx => x) -!!$ type is (psb_d_multivect_gpu) +!!$ type is (psb_d_multivect_cuda) !!$ select type (yy => y) -!!$ type is (psb_d_multivect_gpu) +!!$ type is (psb_d_multivect_cuda) !!$ if (xx%is_host()) call xx%sync() !!$ if (yy%is_host()) call yy%sync() !!$ ! Z state is irrelevant: it will be done on the GPU. @@ -1711,11 +1711,11 @@ contains !!$ call z%psb_d_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() !!$ end select -!!$ end subroutine d_gpu_multi_mlt_v_2 +!!$ end subroutine d_cuda_multi_mlt_v_2 - subroutine d_gpu_multi_set_scal(x,val) - class(psb_d_multivect_gpu), intent(inout) :: x + subroutine d_cuda_multi_set_scal(x,val) + class(psb_d_multivect_cuda), intent(inout) :: x real(psb_dpk_), intent(in) :: val integer(psb_ipk_) :: info @@ -1723,10 +1723,10 @@ contains if (x%is_dev()) call x%sync() call x%psb_d_base_multivect_type%set_scal(val) call x%set_host() - end subroutine d_gpu_multi_set_scal + end subroutine d_cuda_multi_set_scal - subroutine d_gpu_multi_set_vect(x,val) - class(psb_d_multivect_gpu), intent(inout) :: x + subroutine d_cuda_multi_set_vect(x,val) + class(psb_d_multivect_cuda), intent(inout) :: x real(psb_dpk_), intent(in) :: val(:,:) integer(psb_ipk_) :: nr integer(psb_ipk_) :: info @@ -1735,24 +1735,24 @@ contains call x%psb_d_base_multivect_type%set_vect(val) call x%set_host() - end subroutine d_gpu_multi_set_vect + end subroutine d_cuda_multi_set_vect -!!$ subroutine d_gpu_multi_scal(alpha, x) +!!$ subroutine d_cuda_multi_scal(alpha, x) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ real(psb_dpk_), intent (in) :: alpha !!$ !!$ if (x%is_dev()) call x%sync() !!$ call x%psb_d_base_multivect_type%scal(alpha) !!$ call x%set_host() -!!$ end subroutine d_gpu_multi_scal +!!$ end subroutine d_cuda_multi_scal !!$ !!$ -!!$ function d_gpu_multi_nrm2(n,x) result(res) +!!$ function d_cuda_multi_nrm2(n,x) result(res) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ integer(psb_ipk_) :: info @@ -1760,36 +1760,36 @@ contains !!$ if (x%is_host()) call x%sync() !!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) !!$ -!!$ end function d_gpu_multi_nrm2 +!!$ end function d_cuda_multi_nrm2 !!$ -!!$ function d_gpu_multi_amax(n,x) result(res) +!!$ function d_cuda_multi_amax(n,x) result(res) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = maxval(abs(x%v(1:n))) !!$ -!!$ end function d_gpu_multi_amax +!!$ end function d_cuda_multi_amax !!$ -!!$ function d_gpu_multi_asum(n,x) result(res) +!!$ function d_cuda_multi_asum(n,x) result(res) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = sum(abs(x%v(1:n))) !!$ -!!$ end function d_gpu_multi_asum +!!$ end function d_cuda_multi_asum - subroutine d_gpu_multi_all(m,n, x, info) + subroutine d_cuda_multi_all(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_multivect_gpu), intent(out) :: x + class(psb_d_multivect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(m,n,x%v,info,pad=dzero) @@ -1799,26 +1799,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'d_gpu_multi_all',& + call psb_errpush(info,'d_cuda_multi_all',& & i_err=(/m,n,n,n,n/)) end if - end subroutine d_gpu_multi_all + end subroutine d_cuda_multi_all - subroutine d_gpu_multi_zero(x) + subroutine d_cuda_multi_zero(x) use psi_serial_mod implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine d_gpu_multi_zero + end subroutine d_cuda_multi_zero - subroutine d_gpu_multi_asb(m,n, x, info) + subroutine d_cuda_multi_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nd, nc @@ -1838,12 +1838,12 @@ contains call x%set_host() end if end if - end subroutine d_gpu_multi_asb + end subroutine d_cuda_multi_asb - subroutine d_gpu_multi_sync_space(x,info) + subroutine d_cuda_multi_sync_space(x,info) use psb_realloc_mod implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: mh,nh,md,nd @@ -1896,11 +1896,11 @@ contains end if - end subroutine d_gpu_multi_sync_space + end subroutine d_cuda_multi_sync_space - subroutine d_gpu_multi_sync(x) + subroutine d_cuda_multi_sync(x) implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -1916,16 +1916,16 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'d_gpu_multi_sync') + call psb_errpush(info,'d_cuda_multi_sync') end if - end subroutine d_gpu_multi_sync + end subroutine d_cuda_multi_sync - subroutine d_gpu_multi_free(x, info) + subroutine d_cuda_multi_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 @@ -1940,13 +1940,13 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine d_gpu_multi_free + end subroutine d_cuda_multi_free - subroutine d_gpu_multi_vect_finalize(x) + subroutine d_cuda_multi_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_d_multivect_gpu), intent(inout) :: x + type(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 @@ -1961,12 +1961,12 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine d_gpu_multi_vect_finalize + end subroutine d_cuda_multi_vect_finalize - subroutine d_gpu_multi_ins(n,irl,val,dupl,x,info) + subroutine d_cuda_multi_ins(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) real(psb_dpk_), intent(in) :: val(:,:) @@ -1979,11 +1979,11 @@ contains call x%psb_d_base_multivect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine d_gpu_multi_ins + end subroutine d_cuda_multi_ins #endif -end module psb_d_gpu_multivect_mod +end module psb_d_cuda_multivect_mod diff --git a/cuda/psb_i_csrg_mat_mod.F90 b/cuda/psb_i_csrg_mat_mod.F90 deleted file mode 100644 index 9a4a3852..00000000 --- a/cuda/psb_i_csrg_mat_mod.F90 +++ /dev/null @@ -1,393 +0,0 @@ -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! - - -module psb_i_csrg_mat_mod - - use iso_c_binding - use psb_i_mat_mod - use cusparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_i_csr_sparse_mat) :: psb_i_csrg_sparse_mat - ! - ! cuSPARSE 4.0 CSR format. - ! - ! - ! - ! - ! -#ifdef HAVE_SPGPU - type(i_Cmat) :: deviceMat - integer(psb_ipk_) :: devstate = is_host - - contains - procedure, nopass :: get_fmt => i_csrg_get_fmt - procedure, pass(a) :: sizeof => i_csrg_sizeof - procedure, pass(a) :: vect_mv => psb_i_csrg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_i_csrg_inner_vect_sv - procedure, pass(a) :: csmm => psb_i_csrg_csmm - procedure, pass(a) :: csmv => psb_i_csrg_csmv - procedure, pass(a) :: scals => psb_i_csrg_scals - procedure, pass(a) :: scalv => psb_i_csrg_scal - procedure, pass(a) :: reallocate_nz => psb_i_csrg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_i_csrg_allocate_mnnz - ! Note: we do *not* need the TO methods, because the parent type - ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_csrg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_i_cp_csrg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_csrg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_i_mv_csrg_from_fmt - procedure, pass(a) :: free => i_csrg_free - procedure, pass(a) :: mold => psb_i_csrg_mold - procedure, pass(a) :: is_host => i_csrg_is_host - procedure, pass(a) :: is_dev => i_csrg_is_dev - procedure, pass(a) :: is_sync => i_csrg_is_sync - procedure, pass(a) :: set_host => i_csrg_set_host - procedure, pass(a) :: set_dev => i_csrg_set_dev - procedure, pass(a) :: set_sync => i_csrg_set_sync - procedure, pass(a) :: sync => i_csrg_sync - procedure, pass(a) :: to_gpu => psb_i_csrg_to_gpu - procedure, pass(a) :: from_gpu => psb_i_csrg_from_gpu - final :: i_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_csrg_mold -#endif - end type psb_i_csrg_sparse_mat - -#ifdef HAVE_SPGPU - private :: i_csrg_get_nzeros, i_csrg_free, i_csrg_get_fmt, & - & i_csrg_get_size, i_csrg_sizeof, i_csrg_get_nz_row - - - interface - subroutine psb_i_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - integer(psb_ipk_), intent(in) :: alpha, beta - class(psb_i_base_vect_type), intent(inout) :: x - class(psb_i_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_i_csrg_inner_vect_sv - end interface - - - interface - subroutine psb_i_csrg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - integer(psb_ipk_), intent(in) :: alpha, beta - class(psb_i_base_vect_type), intent(inout) :: x - class(psb_i_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_i_csrg_vect_mv - end interface - - interface - subroutine psb_i_csrg_reallocate_nz(nz,a) - import :: psb_i_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: nz - class(psb_i_csrg_sparse_mat), intent(inout) :: a - end subroutine psb_i_csrg_reallocate_nz - end interface - - interface - subroutine psb_i_csrg_allocate_mnnz(m,n,a,nz) - import :: psb_i_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: m,n - class(psb_i_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_i_csrg_allocate_mnnz - end interface - - interface - subroutine psb_i_csrg_mold(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_csrg_mold - end interface - - interface - subroutine psb_i_csrg_to_gpu(a,info, nzrm) - import :: psb_i_csrg_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_i_csrg_to_gpu - end interface - - interface - subroutine psb_i_csrg_from_gpu(a,info) - import :: psb_i_csrg_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_csrg_from_gpu - end interface - - interface - subroutine psb_i_cp_csrg_from_coo(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - class(psb_i_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_csrg_from_coo - end interface - - interface - subroutine psb_i_cp_csrg_from_fmt(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - class(psb_i_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_csrg_from_fmt - end interface - - interface - subroutine psb_i_mv_csrg_from_coo(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - class(psb_i_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_csrg_from_coo - end interface - - interface - subroutine psb_i_mv_csrg_from_fmt(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - class(psb_i_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_csrg_from_fmt - end interface - - interface - subroutine psb_i_csrg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - integer(psb_ipk_), intent(in) :: alpha, beta, x(:) - integer(psb_ipk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_i_csrg_csmv - end interface - interface - subroutine psb_i_csrg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) - integer(psb_ipk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_i_csrg_csmm - end interface - - interface - subroutine psb_i_csrg_scal(d,a,info,side) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side - end subroutine psb_i_csrg_scal - end interface - - interface - subroutine psb_i_csrg_scals(d,a,info) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_csrg_scals - end interface - - -contains - - ! == =================================== - ! - ! - ! - ! Getters - ! - ! - ! - ! - ! - ! == =================================== - - - function i_csrg_sizeof(a) result(res) - implicit none - class(psb_i_csrg_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - res = 8 - res = res + psb_sizeof_ip * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - ! Should we account for the shadow data structure - ! on the GPU device side? - ! res = 2*res - - end function i_csrg_sizeof - - function i_csrg_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSRG' - end function i_csrg_get_fmt - - - - ! == =================================== - ! - ! - ! - ! Data management - ! - ! - ! - ! - ! - ! == =================================== - - - subroutine i_csrg_set_host(a) - implicit none - class(psb_i_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine i_csrg_set_host - - subroutine i_csrg_set_dev(a) - implicit none - class(psb_i_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine i_csrg_set_dev - - subroutine i_csrg_set_sync(a) - implicit none - class(psb_i_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine i_csrg_set_sync - - function i_csrg_is_dev(a) result(res) - implicit none - class(psb_i_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function i_csrg_is_dev - - function i_csrg_is_host(a) result(res) - implicit none - class(psb_i_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function i_csrg_is_host - - function i_csrg_is_sync(a) result(res) - implicit none - class(psb_i_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function i_csrg_is_sync - - - subroutine i_csrg_sync(a) - implicit none - class(psb_i_csrg_sparse_mat), target, intent(in) :: a - class(psb_i_csrg_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (tmpa%is_host()) then - call tmpa%to_gpu(info) - else if (tmpa%is_dev()) then - call tmpa%from_gpu(info) - end if - call tmpa%set_sync() - return - - end subroutine i_csrg_sync - - subroutine i_csrg_free(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - class(psb_i_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - call a%psb_i_csr_sparse_mat%free() - - return - - end subroutine i_csrg_free - - subroutine i_csrg_finalize(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - type(psb_i_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - - return - - end subroutine i_csrg_finalize - -#else - interface - subroutine psb_i_csrg_mold(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_csrg_mold - end interface - -#endif - -end module psb_i_csrg_mat_mod diff --git a/cuda/psb_i_cuda_csrg_mat_mod.F90 b/cuda/psb_i_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..de0eac09 --- /dev/null +++ b/cuda/psb_i_cuda_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_cuda_csrg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_csr_sparse_mat) :: psb_i_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(i_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => i_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_i_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_i_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_i_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => i_cuda_csrg_free + procedure, pass(a) :: mold => psb_i_cuda_csrg_mold + procedure, pass(a) :: is_host => i_cuda_csrg_is_host + procedure, pass(a) :: is_dev => i_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => i_cuda_csrg_is_sync + procedure, pass(a) :: set_host => i_cuda_csrg_set_host + procedure, pass(a) :: set_dev => i_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => i_cuda_csrg_set_sync + procedure, pass(a) :: sync => i_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_i_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_i_cuda_csrg_from_gpu + final :: i_cuda_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_cuda_csrg_mold +#endif + end type psb_i_cuda_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_cuda_csrg_get_nzeros, i_cuda_csrg_free, i_cuda_csrg_get_fmt, & + & i_cuda_csrg_get_size, i_cuda_csrg_sizeof, i_cuda_csrg_get_nz_row + + + interface + subroutine psb_i_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_i_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_i_cuda_csrg_reallocate_nz(nz,a) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_i_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_i_cuda_csrg_mold(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_mold + end interface + + interface + subroutine psb_i_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_i_cuda_csrg_from_gpu(a,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_i_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_i_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_i_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_i_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_i_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_csmv + end interface + interface + subroutine psb_i_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_csmm + end interface + + interface + subroutine psb_i_cuda_csrg_scal(d,a,info,side) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_cuda_csrg_scal + end interface + + interface + subroutine psb_i_cuda_csrg_scals(d,a,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_ip * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_cuda_csrg_sizeof + + function i_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function i_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine i_cuda_csrg_set_host(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine i_cuda_csrg_set_host + + subroutine i_cuda_csrg_set_dev(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine i_cuda_csrg_set_dev + + subroutine i_cuda_csrg_set_sync(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine i_cuda_csrg_set_sync + + function i_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function i_cuda_csrg_is_dev + + function i_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function i_cuda_csrg_is_host + + function i_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function i_cuda_csrg_is_sync + + + subroutine i_cuda_csrg_sync(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_i_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine i_cuda_csrg_sync + + subroutine i_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_i_csr_sparse_mat%free() + + return + + end subroutine i_cuda_csrg_free + + subroutine i_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine i_cuda_csrg_finalize + +#else + interface + subroutine psb_i_cuda_csrg_mold(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_mold + end interface + +#endif + +end module psb_i_cuda_csrg_mat_mod diff --git a/cuda/psb_i_diag_mat_mod.F90 b/cuda/psb_i_cuda_diag_mat_mod.F90 similarity index 52% rename from cuda/psb_i_diag_mat_mod.F90 rename to cuda/psb_i_cuda_diag_mat_mod.F90 index b54ee8d5..94a3cc3e 100644 --- a/cuda/psb_i_diag_mat_mod.F90 +++ b/cuda/psb_i_cuda_diag_mat_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_i_diag_mat_mod +module psb_i_cuda_diag_mat_mod use iso_c_binding use psb_base_mod use psb_i_dia_mat_mod - type, extends(psb_i_dia_sparse_mat) :: psb_i_diag_sparse_mat + type, extends(psb_i_dia_sparse_mat) :: psb_i_cuda_diag_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -48,170 +48,170 @@ module psb_i_diag_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => i_diag_get_fmt - procedure, pass(a) :: sizeof => i_diag_sizeof - procedure, pass(a) :: vect_mv => psb_i_diag_vect_mv -! procedure, pass(a) :: csmm => psb_i_diag_csmm - procedure, pass(a) :: csmv => psb_i_diag_csmv -! procedure, pass(a) :: in_vect_sv => psb_i_diag_inner_vect_sv -! procedure, pass(a) :: scals => psb_i_diag_scals -! procedure, pass(a) :: scalv => psb_i_diag_scal -! procedure, pass(a) :: reallocate_nz => psb_i_diag_reallocate_nz -! procedure, pass(a) :: allocate_mnnz => psb_i_diag_allocate_mnnz + procedure, nopass :: get_fmt => i_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => i_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_i_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_i_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_i_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_i_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_i_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_i_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_i_cuda_diag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_diag_from_coo -! procedure, pass(a) :: cp_from_fmt => psb_i_cp_diag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_diag_from_coo -! procedure, pass(a) :: mv_from_fmt => psb_i_mv_diag_from_fmt - procedure, pass(a) :: free => i_diag_free - procedure, pass(a) :: mold => psb_i_diag_mold - procedure, pass(a) :: to_gpu => psb_i_diag_to_gpu - final :: i_diag_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => i_cuda_diag_free + procedure, pass(a) :: mold => psb_i_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_diag_to_gpu + final :: i_cuda_diag_finalize #else contains - procedure, pass(a) :: mold => psb_i_diag_mold + procedure, pass(a) :: mold => psb_i_cuda_diag_mold #endif - end type psb_i_diag_sparse_mat + end type psb_i_cuda_diag_sparse_mat #ifdef HAVE_SPGPU - private :: i_diag_get_nzeros, i_diag_free, i_diag_get_fmt, & - & i_diag_get_size, i_diag_sizeof, i_diag_get_nz_row + private :: i_cuda_diag_get_nzeros, i_cuda_diag_free, i_cuda_diag_get_fmt, & + & i_cuda_diag_get_size, i_cuda_diag_sizeof, i_cuda_diag_get_nz_row interface - subroutine psb_i_diag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_diag_vect_mv + end subroutine psb_i_cuda_diag_vect_mv end interface interface - subroutine psb_i_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_i_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_diag_inner_vect_sv + end subroutine psb_i_cuda_diag_inner_vect_sv end interface interface - subroutine psb_i_diag_reallocate_nz(nz,a) - import :: psb_i_diag_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_diag_reallocate_nz(nz,a) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_i_diag_sparse_mat), intent(inout) :: a - end subroutine psb_i_diag_reallocate_nz + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_diag_reallocate_nz end interface interface - subroutine psb_i_diag_allocate_mnnz(m,n,a,nz) - import :: psb_i_diag_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_i_diag_sparse_mat), intent(inout) :: a + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_i_diag_allocate_mnnz + end subroutine psb_i_cuda_diag_allocate_mnnz end interface interface - subroutine psb_i_diag_mold(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_mold(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_diag_mold + end subroutine psb_i_cuda_diag_mold end interface interface - subroutine psb_i_diag_to_gpu(a,info, nzrm) - import :: psb_i_diag_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_i_diag_to_gpu + end subroutine psb_i_cuda_diag_to_gpu end interface interface - subroutine psb_i_cp_diag_from_coo(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_diag_from_coo(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_diag_from_coo + end subroutine psb_i_cuda_cp_diag_from_coo end interface interface - subroutine psb_i_cp_diag_from_fmt(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_diag_from_fmt + end subroutine psb_i_cuda_cp_diag_from_fmt end interface interface - subroutine psb_i_mv_diag_from_coo(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_diag_from_coo(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_diag_from_coo + end subroutine psb_i_cuda_mv_diag_from_coo end interface interface - subroutine psb_i_mv_diag_from_fmt(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_diag_from_fmt + end subroutine psb_i_cuda_mv_diag_from_fmt end interface interface - subroutine psb_i_diag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:) integer(psb_ipk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_diag_csmv + end subroutine psb_i_cuda_diag_csmv end interface interface - subroutine psb_i_diag_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) integer(psb_ipk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_diag_csmm + end subroutine psb_i_cuda_diag_csmm end interface interface - subroutine psb_i_diag_scal(d,a,info, side) - import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_diag_scal(d,a,info, side) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_i_diag_scal + end subroutine psb_i_cuda_diag_scal end interface interface - subroutine psb_i_diag_scals(d,a,info) - import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_diag_scals(d,a,info) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_diag_scals + end subroutine psb_i_cuda_diag_scals end interface @@ -230,9 +230,9 @@ contains ! == =================================== - function i_diag_sizeof(a) result(res) + function i_cuda_diag_sizeof(a) result(res) implicit none - class(psb_i_diag_sparse_mat), intent(in) :: a + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function i_diag_sizeof + end function i_cuda_diag_sizeof - function i_diag_get_fmt() result(res) + function i_cuda_diag_get_fmt() result(res) implicit none character(len=5) :: res res = 'DIAG' - end function i_diag_get_fmt + end function i_cuda_diag_get_fmt @@ -265,11 +265,11 @@ contains ! ! == =================================== - subroutine i_diag_free(a) + subroutine i_cuda_diag_free(a) use diagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_diag_sparse_mat), intent(inout) :: a + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) @@ -278,31 +278,31 @@ contains return - end subroutine i_diag_free + end subroutine i_cuda_diag_free - subroutine i_diag_finalize(a) + subroutine i_cuda_diag_finalize(a) use diagdev_mod implicit none - type(psb_i_diag_sparse_mat), intent(inout) :: a + type(psb_i_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine i_diag_finalize + end subroutine i_cuda_diag_finalize #else interface - subroutine psb_i_diag_mold(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_mold(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_diag_mold + end subroutine psb_i_cuda_diag_mold end interface #endif -end module psb_i_diag_mat_mod +end module psb_i_cuda_diag_mat_mod diff --git a/cuda/psb_i_dnsg_mat_mod.F90 b/cuda/psb_i_cuda_dnsg_mat_mod.F90 similarity index 51% rename from cuda/psb_i_dnsg_mat_mod.F90 rename to cuda/psb_i_cuda_dnsg_mat_mod.F90 index 978996ae..f357977e 100644 --- a/cuda/psb_i_dnsg_mat_mod.F90 +++ b/cuda/psb_i_cuda_dnsg_mat_mod.F90 @@ -30,14 +30,14 @@ ! -module psb_i_dnsg_mat_mod +module psb_i_cuda_dnsg_mat_mod use iso_c_binding use psb_i_mat_mod use psb_i_dns_mat_mod use dnsdev_mod - type, extends(psb_i_dns_sparse_mat) :: psb_i_dnsg_sparse_mat + type, extends(psb_i_dns_sparse_mat) :: psb_i_cuda_dnsg_sparse_mat ! ! ITPACK/DNS format, extended. ! We are adding here the routines to create a copy of the data @@ -49,169 +49,169 @@ module psb_i_dnsg_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => i_dnsg_get_fmt - ! procedure, pass(a) :: sizeof => i_dnsg_sizeof - procedure, pass(a) :: vect_mv => psb_i_dnsg_vect_mv -!!$ procedure, pass(a) :: csmm => psb_i_dnsg_csmm -!!$ procedure, pass(a) :: csmv => psb_i_dnsg_csmv -!!$ procedure, pass(a) :: in_vect_sv => psb_i_dnsg_inner_vect_sv -!!$ procedure, pass(a) :: scals => psb_i_dnsg_scals -!!$ procedure, pass(a) :: scalv => psb_i_dnsg_scal -!!$ procedure, pass(a) :: reallocate_nz => psb_i_dnsg_reallocate_nz -!!$ procedure, pass(a) :: allocate_mnnz => psb_i_dnsg_allocate_mnnz + procedure, nopass :: get_fmt => i_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => i_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_i_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_i_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_i_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_i_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_i_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_i_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_i_cuda_dnsg_allocate_mnnz ! Note: we *do* need the TO methods, because of the need to invoke SYNC ! - procedure, pass(a) :: cp_from_coo => psb_i_cp_dnsg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_i_cp_dnsg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_dnsg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_i_mv_dnsg_from_fmt - procedure, pass(a) :: free => i_dnsg_free - procedure, pass(a) :: mold => psb_i_dnsg_mold - procedure, pass(a) :: to_gpu => psb_i_dnsg_to_gpu - final :: i_dnsg_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => i_cuda_dnsg_free + procedure, pass(a) :: mold => psb_i_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_dnsg_to_gpu + final :: i_cuda_dnsg_finalize #else contains - procedure, pass(a) :: mold => psb_i_dnsg_mold + procedure, pass(a) :: mold => psb_i_cuda_dnsg_mold #endif - end type psb_i_dnsg_sparse_mat + end type psb_i_cuda_dnsg_sparse_mat #ifdef HAVE_SPGPU - private :: i_dnsg_get_nzeros, i_dnsg_free, i_dnsg_get_fmt, & - & i_dnsg_get_size, i_dnsg_get_nz_row + private :: i_cuda_dnsg_get_nzeros, i_cuda_dnsg_free, i_cuda_dnsg_get_fmt, & + & i_cuda_dnsg_get_size, i_cuda_dnsg_get_nz_row interface - subroutine psb_i_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_dnsg_vect_mv + end subroutine psb_i_cuda_dnsg_vect_mv end interface !!$ !!$ interface -!!$ subroutine psb_i_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_i_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type -!!$ class(psb_i_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_i_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a !!$ integer(psb_ipk_), intent(in) :: alpha, beta !!$ class(psb_i_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_i_dnsg_inner_vect_sv +!!$ end subroutine psb_i_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_i_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_i_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_i_dnsg_reallocate_nz +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_i_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_i_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_i_dnsg_allocate_mnnz +!!$ end subroutine psb_i_cuda_dnsg_allocate_mnnz !!$ end interface interface - subroutine psb_i_dnsg_mold(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_dnsg_mold(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_dnsg_mold + end subroutine psb_i_cuda_dnsg_mold end interface interface - subroutine psb_i_dnsg_to_gpu(a,info) - import :: psb_i_dnsg_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_dnsg_to_gpu(a,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_dnsg_to_gpu + end subroutine psb_i_cuda_dnsg_to_gpu end interface interface - subroutine psb_i_cp_dnsg_from_coo(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_dnsg_from_coo + end subroutine psb_i_cuda_cp_dnsg_from_coo end interface interface - subroutine psb_i_cp_dnsg_from_fmt(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_dnsg_from_fmt + end subroutine psb_i_cuda_cp_dnsg_from_fmt end interface interface - subroutine psb_i_mv_dnsg_from_coo(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_dnsg_from_coo + end subroutine psb_i_cuda_mv_dnsg_from_coo end interface interface - subroutine psb_i_mv_dnsg_from_fmt(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_dnsg_from_fmt + end subroutine psb_i_cuda_mv_dnsg_from_fmt end interface !!$ interface -!!$ subroutine psb_i_dnsg_csmv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_i_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a !!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:) !!$ integer(psb_ipk_), intent(inout) :: y(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_i_dnsg_csmv +!!$ end subroutine psb_i_cuda_dnsg_csmv !!$ end interface !!$ interface -!!$ subroutine psb_i_dnsg_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_i_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a !!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) !!$ integer(psb_ipk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_i_dnsg_csmm +!!$ end subroutine psb_i_cuda_dnsg_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_dnsg_scal(d,a,info, side) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_i_dnsg_scal +!!$ end subroutine psb_i_cuda_dnsg_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_dnsg_scals(d,a,info) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_i_dnsg_scals +!!$ end subroutine psb_i_cuda_dnsg_scals !!$ end interface !!$ @@ -231,11 +231,11 @@ contains - function i_dnsg_get_fmt() result(res) + function i_cuda_dnsg_get_fmt() result(res) implicit none character(len=5) :: res res = 'DNSG' - end function i_dnsg_get_fmt + end function i_cuda_dnsg_get_fmt @@ -251,11 +251,11 @@ contains ! ! == =================================== - subroutine i_dnsg_free(a) + subroutine i_cuda_dnsg_free(a) use dnsdev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) @@ -264,31 +264,31 @@ contains return - end subroutine i_dnsg_free + end subroutine i_cuda_dnsg_free - subroutine i_dnsg_finalize(a) + subroutine i_cuda_dnsg_finalize(a) use dnsdev_mod implicit none - type(psb_i_dnsg_sparse_mat), intent(inout) :: a + type(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine i_dnsg_finalize + end subroutine i_cuda_dnsg_finalize #else interface - subroutine psb_i_dnsg_mold(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_dnsg_mold(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_dnsg_mold + end subroutine psb_i_cuda_dnsg_mold end interface #endif -end module psb_i_dnsg_mat_mod +end module psb_i_cuda_dnsg_mat_mod diff --git a/cuda/psb_i_elg_mat_mod.F90 b/cuda/psb_i_cuda_elg_mat_mod.F90 similarity index 50% rename from cuda/psb_i_elg_mat_mod.F90 rename to cuda/psb_i_cuda_elg_mat_mod.F90 index a421e611..aa3e2d4d 100644 --- a/cuda/psb_i_elg_mat_mod.F90 +++ b/cuda/psb_i_cuda_elg_mat_mod.F90 @@ -30,18 +30,18 @@ ! -module psb_i_elg_mat_mod +module psb_i_cuda_elg_mat_mod use iso_c_binding use psb_i_mat_mod use psb_i_ell_mat_mod - use psb_i_gpu_vect_mod + use psb_i_cuda_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_i_ell_sparse_mat) :: psb_i_elg_sparse_mat + type, extends(psb_i_ell_sparse_mat) :: psb_i_cuda_elg_sparse_mat ! ! ITPACK/ELL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,221 +54,221 @@ module psb_i_elg_mat_mod integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => i_elg_get_fmt - procedure, pass(a) :: sizeof => i_elg_sizeof - procedure, pass(a) :: vect_mv => psb_i_elg_vect_mv - procedure, pass(a) :: csmm => psb_i_elg_csmm - procedure, pass(a) :: csmv => psb_i_elg_csmv - procedure, pass(a) :: in_vect_sv => psb_i_elg_inner_vect_sv - procedure, pass(a) :: scals => psb_i_elg_scals - procedure, pass(a) :: scalv => psb_i_elg_scal - procedure, pass(a) :: reallocate_nz => psb_i_elg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_i_elg_allocate_mnnz - procedure, pass(a) :: reinit => i_elg_reinit + procedure, nopass :: get_fmt => i_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_i_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_i_cuda_elg_scals + procedure, pass(a) :: scalv => psb_i_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => i_cuda_elg_reinit ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_elg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_i_cp_elg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_elg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_i_mv_elg_from_fmt - procedure, pass(a) :: free => i_elg_free - procedure, pass(a) :: mold => psb_i_elg_mold - procedure, pass(a) :: csput_a => psb_i_elg_csput_a - procedure, pass(a) :: csput_v => psb_i_elg_csput_v - procedure, pass(a) :: is_host => i_elg_is_host - procedure, pass(a) :: is_dev => i_elg_is_dev - procedure, pass(a) :: is_sync => i_elg_is_sync - procedure, pass(a) :: set_host => i_elg_set_host - procedure, pass(a) :: set_dev => i_elg_set_dev - procedure, pass(a) :: set_sync => i_elg_set_sync - procedure, pass(a) :: sync => i_elg_sync - procedure, pass(a) :: from_gpu => psb_i_elg_from_gpu - procedure, pass(a) :: to_gpu => psb_i_elg_to_gpu - procedure, pass(a) :: asb => psb_i_elg_asb - final :: i_elg_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => i_cuda_elg_free + procedure, pass(a) :: mold => psb_i_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_i_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_i_cuda_elg_csput_v + procedure, pass(a) :: is_host => i_cuda_elg_is_host + procedure, pass(a) :: is_dev => i_cuda_elg_is_dev + procedure, pass(a) :: is_sync => i_cuda_elg_is_sync + procedure, pass(a) :: set_host => i_cuda_elg_set_host + procedure, pass(a) :: set_dev => i_cuda_elg_set_dev + procedure, pass(a) :: set_sync => i_cuda_elg_set_sync + procedure, pass(a) :: sync => i_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_i_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_i_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_i_cuda_elg_asb + final :: i_cuda_elg_finalize #else contains - procedure, pass(a) :: mold => psb_i_elg_mold - procedure, pass(a) :: asb => psb_i_elg_asb + procedure, pass(a) :: mold => psb_i_cuda_elg_mold + procedure, pass(a) :: asb => psb_i_cuda_elg_asb #endif - end type psb_i_elg_sparse_mat + end type psb_i_cuda_elg_sparse_mat #ifdef HAVE_SPGPU - private :: i_elg_get_nzeros, i_elg_free, i_elg_get_fmt, & - & i_elg_get_size, i_elg_sizeof, i_elg_get_nz_row, i_elg_sync + private :: i_cuda_elg_get_nzeros, i_cuda_elg_free, i_cuda_elg_get_fmt, & + & i_cuda_elg_get_size, i_cuda_elg_sizeof, i_cuda_elg_get_nz_row, i_cuda_elg_sync interface - subroutine psb_i_elg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_elg_vect_mv + end subroutine psb_i_cuda_elg_vect_mv end interface interface - subroutine psb_i_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_i_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_elg_inner_vect_sv + end subroutine psb_i_cuda_elg_inner_vect_sv end interface interface - subroutine psb_i_elg_reallocate_nz(nz,a) - import :: psb_i_elg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_elg_reallocate_nz(nz,a) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_i_elg_sparse_mat), intent(inout) :: a - end subroutine psb_i_elg_reallocate_nz + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_elg_reallocate_nz end interface interface - subroutine psb_i_elg_allocate_mnnz(m,n,a,nz) - import :: psb_i_elg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_i_elg_allocate_mnnz + end subroutine psb_i_cuda_elg_allocate_mnnz end interface interface - subroutine psb_i_elg_mold(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_mold(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_mold + end subroutine psb_i_cuda_elg_mold end interface interface - subroutine psb_i_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& & imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_csput_a + end subroutine psb_i_cuda_elg_csput_a end interface interface - subroutine psb_i_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_i_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_i_base_vect_type,& + subroutine psb_i_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_i_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_i_base_vect_type,& & psb_i_base_vect_type - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a class(psb_i_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_csput_v + end subroutine psb_i_cuda_elg_csput_v end interface interface - subroutine psb_i_elg_from_gpu(a,info) - import :: psb_i_elg_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_elg_from_gpu(a,info) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_from_gpu + end subroutine psb_i_cuda_elg_from_gpu end interface interface - subroutine psb_i_elg_to_gpu(a,info, nzrm) - import :: psb_i_elg_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_i_elg_to_gpu + end subroutine psb_i_cuda_elg_to_gpu end interface interface - subroutine psb_i_cp_elg_from_coo(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_elg_from_coo(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_elg_from_coo + end subroutine psb_i_cuda_cp_elg_from_coo end interface interface - subroutine psb_i_cp_elg_from_fmt(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_elg_from_fmt + end subroutine psb_i_cuda_cp_elg_from_fmt end interface interface - subroutine psb_i_mv_elg_from_coo(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_elg_from_coo(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_elg_from_coo + end subroutine psb_i_cuda_mv_elg_from_coo end interface interface - subroutine psb_i_mv_elg_from_fmt(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_elg_from_fmt + end subroutine psb_i_cuda_mv_elg_from_fmt end interface interface - subroutine psb_i_elg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:) integer(psb_ipk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_elg_csmv + end subroutine psb_i_cuda_elg_csmv end interface interface - subroutine psb_i_elg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) integer(psb_ipk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_elg_csmm + end subroutine psb_i_cuda_elg_csmm end interface interface - subroutine psb_i_elg_scal(d,a,info, side) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_elg_scal(d,a,info, side) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_i_elg_scal + end subroutine psb_i_cuda_elg_scal end interface interface - subroutine psb_i_elg_scals(d,a,info) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_elg_scals(d,a,info) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_scals + end subroutine psb_i_cuda_elg_scals end interface interface - subroutine psb_i_elg_asb(a) - import :: psb_i_elg_sparse_mat - class(psb_i_elg_sparse_mat), intent(inout) :: a - end subroutine psb_i_elg_asb + subroutine psb_i_cuda_elg_asb(a) + import :: psb_i_cuda_elg_sparse_mat + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_elg_asb end interface @@ -287,9 +287,9 @@ contains ! == =================================== - function i_elg_sizeof(a) result(res) + function i_cuda_elg_sizeof(a) result(res) implicit none - class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res if (a%is_dev()) call a%sync() @@ -302,13 +302,13 @@ contains ! on the GPU device side? ! res = 2*res - end function i_elg_sizeof + end function i_cuda_elg_sizeof - function i_elg_get_fmt() result(res) + function i_cuda_elg_get_fmt() result(res) implicit none character(len=5) :: res res = 'ELG' - end function i_elg_get_fmt + end function i_cuda_elg_get_fmt @@ -323,12 +323,12 @@ contains ! ! ! == =================================== - subroutine i_elg_reinit(a,clear) + subroutine i_cuda_elg_reinit(a,clear) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a logical, intent(in), optional :: clear integer(psb_ipk_) :: isz, err_act character(len=20) :: name='reinit' @@ -367,14 +367,14 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine i_elg_reinit + end subroutine i_cuda_elg_reinit - subroutine i_elg_free(a) + subroutine i_cuda_elg_free(a) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) @@ -384,12 +384,12 @@ contains return - end subroutine i_elg_free + end subroutine i_cuda_elg_free - subroutine i_elg_sync(a) + subroutine i_cuda_elg_sync(a) implicit none - class(psb_i_elg_sparse_mat), target, intent(in) :: a - class(psb_i_elg_sparse_mat), pointer :: tmpa + class(psb_i_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -401,83 +401,83 @@ contains call tmpa%set_sync() return - end subroutine i_elg_sync + end subroutine i_cuda_elg_sync - subroutine i_elg_set_host(a) + subroutine i_cuda_elg_set_host(a) implicit none - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine i_elg_set_host + end subroutine i_cuda_elg_set_host - subroutine i_elg_set_dev(a) + subroutine i_cuda_elg_set_dev(a) implicit none - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine i_elg_set_dev + end subroutine i_cuda_elg_set_dev - subroutine i_elg_set_sync(a) + subroutine i_cuda_elg_set_sync(a) implicit none - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine i_elg_set_sync + end subroutine i_cuda_elg_set_sync - function i_elg_is_dev(a) result(res) + function i_cuda_elg_is_dev(a) result(res) implicit none - class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function i_elg_is_dev + end function i_cuda_elg_is_dev - function i_elg_is_host(a) result(res) + function i_cuda_elg_is_host(a) result(res) implicit none - class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function i_elg_is_host + end function i_cuda_elg_is_host - function i_elg_is_sync(a) result(res) + function i_cuda_elg_is_sync(a) result(res) implicit none - class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function i_elg_is_sync + end function i_cuda_elg_is_sync - subroutine i_elg_finalize(a) + subroutine i_cuda_elg_finalize(a) use elldev_mod implicit none - type(psb_i_elg_sparse_mat), intent(inout) :: a + type(psb_i_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine i_elg_finalize + end subroutine i_cuda_elg_finalize #else interface - subroutine psb_i_elg_asb(a) - import :: psb_i_elg_sparse_mat - class(psb_i_elg_sparse_mat), intent(inout) :: a - end subroutine psb_i_elg_asb + subroutine psb_i_cuda_elg_asb(a) + import :: psb_i_cuda_elg_sparse_mat + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_elg_asb end interface interface - subroutine psb_i_elg_mold(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_mold(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_mold + end subroutine psb_i_cuda_elg_mold end interface #endif -end module psb_i_elg_mat_mod +end module psb_i_cuda_elg_mat_mod diff --git a/cuda/psb_i_hdiag_mat_mod.F90 b/cuda/psb_i_cuda_hdiag_mat_mod.F90 similarity index 50% rename from cuda/psb_i_hdiag_mat_mod.F90 rename to cuda/psb_i_cuda_hdiag_mat_mod.F90 index a42030b8..03ff573b 100644 --- a/cuda/psb_i_hdiag_mat_mod.F90 +++ b/cuda/psb_i_cuda_hdiag_mat_mod.F90 @@ -30,182 +30,182 @@ ! -module psb_i_hdiag_mat_mod +module psb_i_cuda_hdiag_mat_mod use iso_c_binding use psb_base_mod use psb_i_hdia_mat_mod - type, extends(psb_i_hdia_sparse_mat) :: psb_i_hdiag_sparse_mat + type, extends(psb_i_hdia_sparse_mat) :: psb_i_cuda_hdiag_sparse_mat ! #ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => i_hdiag_get_fmt - ! procedure, pass(a) :: sizeof => i_hdiag_sizeof - procedure, pass(a) :: vect_mv => psb_i_hdiag_vect_mv - ! procedure, pass(a) :: csmm => psb_i_hdiag_csmm - procedure, pass(a) :: csmv => psb_i_hdiag_csmv - ! procedure, pass(a) :: in_vect_sv => psb_i_hdiag_inner_vect_sv - ! procedure, pass(a) :: scals => psb_i_hdiag_scals - ! procedure, pass(a) :: scalv => psb_i_hdiag_scal - ! procedure, pass(a) :: reallocate_nz => psb_i_hdiag_reallocate_nz - ! procedure, pass(a) :: allocate_mnnz => psb_i_hdiag_allocate_mnnz + procedure, nopass :: get_fmt => i_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => i_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_i_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_i_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_i_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_i_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_i_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_i_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_i_cuda_hdiag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_hdiag_from_coo - ! procedure, pass(a) :: cp_from_fmt => psb_i_cp_hdiag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_hdiag_from_coo - ! procedure, pass(a) :: mv_from_fmt => psb_i_mv_hdiag_from_fmt - procedure, pass(a) :: free => i_hdiag_free - procedure, pass(a) :: mold => psb_i_hdiag_mold - procedure, pass(a) :: to_gpu => psb_i_hdiag_to_gpu - final :: i_hdiag_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => i_cuda_hdiag_free + procedure, pass(a) :: mold => psb_i_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_hdiag_to_gpu + final :: i_cuda_hdiag_finalize #else contains - procedure, pass(a) :: mold => psb_i_hdiag_mold + procedure, pass(a) :: mold => psb_i_cuda_hdiag_mold #endif - end type psb_i_hdiag_sparse_mat + end type psb_i_cuda_hdiag_sparse_mat #ifdef HAVE_SPGPU - private :: i_hdiag_get_nzeros, i_hdiag_free, i_hdiag_get_fmt, & - & i_hdiag_get_size, i_hdiag_sizeof, i_hdiag_get_nz_row + private :: i_cuda_hdiag_get_nzeros, i_cuda_hdiag_free, i_cuda_hdiag_get_fmt, & + & i_cuda_hdiag_get_size, i_cuda_hdiag_sizeof, i_cuda_hdiag_get_nz_row interface - subroutine psb_i_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hdiag_vect_mv + end subroutine psb_i_cuda_hdiag_vect_mv end interface !!$ interface -!!$ subroutine psb_i_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_i_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type -!!$ class(psb_i_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_i_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a !!$ integer(psb_ipk_), intent(in) :: alpha, beta !!$ class(psb_i_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_i_hdiag_inner_vect_sv +!!$ end subroutine psb_i_cuda_hdiag_inner_vect_sv !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_hdiag_reallocate_nz(nz,a) -!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_i_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_i_hdiag_reallocate_nz +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_i_cuda_hdiag_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_hdiag_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_i_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_i_hdiag_allocate_mnnz +!!$ end subroutine psb_i_cuda_hdiag_allocate_mnnz !!$ end interface interface - subroutine psb_i_hdiag_mold(a,b,info) - import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hdiag_mold(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hdiag_mold + end subroutine psb_i_cuda_hdiag_mold end interface interface - subroutine psb_i_hdiag_to_gpu(a,info) - import :: psb_i_hdiag_sparse_mat, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hdiag_to_gpu(a,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hdiag_to_gpu + end subroutine psb_i_cuda_hdiag_to_gpu end interface interface - subroutine psb_i_cp_hdiag_from_coo(a,b,info) - import :: psb_i_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_hdiag_from_coo + end subroutine psb_i_cuda_cp_hdiag_from_coo end interface !!$ interface -!!$ subroutine psb_i_cp_hdiag_from_fmt(a,b,info) -!!$ import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_i_base_sparse_mat), intent(in) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_i_cp_hdiag_from_fmt +!!$ end subroutine psb_i_cuda_cp_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_i_mv_hdiag_from_coo(a,b,info) - import :: psb_i_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_hdiag_from_coo + end subroutine psb_i_cuda_mv_hdiag_from_coo end interface !!$ !!$ interface -!!$ subroutine psb_i_mv_hdiag_from_fmt(a,b,info) -!!$ import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_i_base_sparse_mat), intent(inout) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_i_mv_hdiag_from_fmt +!!$ end subroutine psb_i_cuda_mv_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_i_hdiag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:) integer(psb_ipk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hdiag_csmv + end subroutine psb_i_cuda_hdiag_csmv end interface !!$ interface -!!$ subroutine psb_i_hdiag_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_i_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a !!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) !!$ integer(psb_ipk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_i_hdiag_csmm +!!$ end subroutine psb_i_cuda_hdiag_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_hdiag_scal(d,a,info, side) -!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_i_hdiag_scal +!!$ end subroutine psb_i_cuda_hdiag_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_hdiag_scals(d,a,info) -!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_i_hdiag_scals +!!$ end subroutine psb_i_cuda_hdiag_scals !!$ end interface !!$ @@ -223,11 +223,11 @@ contains ! ! == =================================== - function i_hdiag_get_fmt() result(res) + function i_cuda_hdiag_get_fmt() result(res) implicit none character(len=5) :: res res = 'HDIAG' - end function i_hdiag_get_fmt + end function i_cuda_hdiag_get_fmt @@ -243,11 +243,11 @@ contains ! ! == =================================== - subroutine i_hdiag_free(a) + subroutine i_cuda_hdiag_free(a) use hdiagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_hdiag_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -256,12 +256,12 @@ contains return - end subroutine i_hdiag_free + end subroutine i_cuda_hdiag_free - subroutine i_hdiag_finalize(a) + subroutine i_cuda_hdiag_finalize(a) use hdiagdev_mod implicit none - type(psb_i_hdiag_sparse_mat), intent(inout) :: a + type(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -269,19 +269,19 @@ contains call a%psb_i_hdia_sparse_mat%free() return - end subroutine i_hdiag_finalize + end subroutine i_cuda_hdiag_finalize #else interface - subroutine psb_i_hdiag_mold(a,b,info) - import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hdiag_mold(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hdiag_mold + end subroutine psb_i_cuda_hdiag_mold end interface #endif -end module psb_i_hdiag_mat_mod +end module psb_i_cuda_hdiag_mat_mod diff --git a/cuda/psb_i_hlg_mat_mod.F90 b/cuda/psb_i_cuda_hlg_mat_mod.F90 similarity index 50% rename from cuda/psb_i_hlg_mat_mod.F90 rename to cuda/psb_i_cuda_hlg_mat_mod.F90 index 2ec881ce..f97470d2 100644 --- a/cuda/psb_i_hlg_mat_mod.F90 +++ b/cuda/psb_i_cuda_hlg_mat_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_i_hlg_mat_mod +module psb_i_cuda_hlg_mat_mod use iso_c_binding use psb_i_mat_mod @@ -41,7 +41,7 @@ module psb_i_hlg_mat_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_i_hll_sparse_mat) :: psb_i_hlg_sparse_mat + type, extends(psb_i_hll_sparse_mat) :: psb_i_cuda_hlg_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,186 +54,186 @@ module psb_i_hlg_mat_mod integer :: devstate = is_host contains - procedure, nopass :: get_fmt => i_hlg_get_fmt - procedure, pass(a) :: sizeof => i_hlg_sizeof - procedure, pass(a) :: vect_mv => psb_i_hlg_vect_mv - procedure, pass(a) :: csmm => psb_i_hlg_csmm - procedure, pass(a) :: csmv => psb_i_hlg_csmv - procedure, pass(a) :: in_vect_sv => psb_i_hlg_inner_vect_sv - procedure, pass(a) :: scals => psb_i_hlg_scals - procedure, pass(a) :: scalv => psb_i_hlg_scal - procedure, pass(a) :: reallocate_nz => psb_i_hlg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_i_hlg_allocate_mnnz + procedure, nopass :: get_fmt => i_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_i_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_i_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_i_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_hlg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_hlg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_i_cp_hlg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_hlg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_i_mv_hlg_from_fmt - procedure, pass(a) :: free => i_hlg_free - procedure, pass(a) :: mold => psb_i_hlg_mold - procedure, pass(a) :: is_host => i_hlg_is_host - procedure, pass(a) :: is_dev => i_hlg_is_dev - procedure, pass(a) :: is_sync => i_hlg_is_sync - procedure, pass(a) :: set_host => i_hlg_set_host - procedure, pass(a) :: set_dev => i_hlg_set_dev - procedure, pass(a) :: set_sync => i_hlg_set_sync - procedure, pass(a) :: sync => i_hlg_sync - procedure, pass(a) :: from_gpu => psb_i_hlg_from_gpu - procedure, pass(a) :: to_gpu => psb_i_hlg_to_gpu - final :: i_hlg_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => i_cuda_hlg_free + procedure, pass(a) :: mold => psb_i_cuda_hlg_mold + procedure, pass(a) :: is_host => i_cuda_hlg_is_host + procedure, pass(a) :: is_dev => i_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => i_cuda_hlg_is_sync + procedure, pass(a) :: set_host => i_cuda_hlg_set_host + procedure, pass(a) :: set_dev => i_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => i_cuda_hlg_set_sync + procedure, pass(a) :: sync => i_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_i_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_i_cuda_hlg_to_gpu + final :: i_cuda_hlg_finalize #else contains - procedure, pass(a) :: mold => psb_i_hlg_mold + procedure, pass(a) :: mold => psb_i_cuda_hlg_mold #endif - end type psb_i_hlg_sparse_mat + end type psb_i_cuda_hlg_sparse_mat #ifdef HAVE_SPGPU - private :: i_hlg_get_nzeros, i_hlg_free, i_hlg_get_fmt, & - & i_hlg_get_size, i_hlg_sizeof, i_hlg_get_nz_row + private :: i_cuda_hlg_get_nzeros, i_cuda_hlg_free, i_cuda_hlg_get_fmt, & + & i_cuda_hlg_get_size, i_cuda_hlg_sizeof, i_cuda_hlg_get_nz_row interface - subroutine psb_i_hlg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hlg_vect_mv + end subroutine psb_i_cuda_hlg_vect_mv end interface interface - subroutine psb_i_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_i_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hlg_inner_vect_sv + end subroutine psb_i_cuda_hlg_inner_vect_sv end interface interface - subroutine psb_i_hlg_reallocate_nz(nz,a) - import :: psb_i_hlg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_hlg_reallocate_nz(nz,a) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_i_hlg_sparse_mat), intent(inout) :: a - end subroutine psb_i_hlg_reallocate_nz + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_hlg_reallocate_nz end interface interface - subroutine psb_i_hlg_allocate_mnnz(m,n,a,nz) - import :: psb_i_hlg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_i_hlg_allocate_mnnz + end subroutine psb_i_cuda_hlg_allocate_mnnz end interface interface - subroutine psb_i_hlg_mold(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_mold(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hlg_mold + end subroutine psb_i_cuda_hlg_mold end interface interface - subroutine psb_i_hlg_from_gpu(a,info) - import :: psb_i_hlg_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hlg_from_gpu(a,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hlg_from_gpu + end subroutine psb_i_cuda_hlg_from_gpu end interface interface - subroutine psb_i_hlg_to_gpu(a,info, nzrm) - import :: psb_i_hlg_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_i_hlg_to_gpu + end subroutine psb_i_cuda_hlg_to_gpu end interface interface - subroutine psb_i_cp_hlg_from_coo(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_hlg_from_coo + end subroutine psb_i_cuda_cp_hlg_from_coo end interface interface - subroutine psb_i_cp_hlg_from_fmt(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_hlg_from_fmt + end subroutine psb_i_cuda_cp_hlg_from_fmt end interface interface - subroutine psb_i_mv_hlg_from_coo(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_hlg_from_coo + end subroutine psb_i_cuda_mv_hlg_from_coo end interface interface - subroutine psb_i_mv_hlg_from_fmt(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_hlg_from_fmt + end subroutine psb_i_cuda_mv_hlg_from_fmt end interface interface - subroutine psb_i_hlg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:) integer(psb_ipk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hlg_csmv + end subroutine psb_i_cuda_hlg_csmv end interface interface - subroutine psb_i_hlg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) integer(psb_ipk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hlg_csmm + end subroutine psb_i_cuda_hlg_csmm end interface interface - subroutine psb_i_hlg_scal(d,a,info, side) - import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hlg_scal(d,a,info, side) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_i_hlg_scal + end subroutine psb_i_cuda_hlg_scal end interface interface - subroutine psb_i_hlg_scals(d,a,info) - import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hlg_scals(d,a,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hlg_scals + end subroutine psb_i_cuda_hlg_scals end interface @@ -252,9 +252,9 @@ contains ! == =================================== - function i_hlg_sizeof(a) result(res) + function i_cuda_hlg_sizeof(a) result(res) implicit none - class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res @@ -269,13 +269,13 @@ contains ! on the GPU device side? ! res = 2*res - end function i_hlg_sizeof + end function i_cuda_hlg_sizeof - function i_hlg_get_fmt() result(res) + function i_cuda_hlg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HLG' - end function i_hlg_get_fmt + end function i_cuda_hlg_get_fmt @@ -291,11 +291,11 @@ contains ! ! == =================================== - subroutine i_hlg_free(a) + subroutine i_cuda_hlg_free(a) use hlldev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) @@ -304,13 +304,13 @@ contains return - end subroutine i_hlg_free + end subroutine i_cuda_hlg_free - subroutine i_hlg_sync(a) + subroutine i_cuda_hlg_sync(a) implicit none - class(psb_i_hlg_sparse_mat), target, intent(in) :: a - class(psb_i_hlg_sparse_mat), pointer :: tmpa + class(psb_i_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -322,77 +322,77 @@ contains call tmpa%set_sync() return - end subroutine i_hlg_sync + end subroutine i_cuda_hlg_sync - subroutine i_hlg_set_host(a) + subroutine i_cuda_hlg_set_host(a) implicit none - class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine i_hlg_set_host + end subroutine i_cuda_hlg_set_host - subroutine i_hlg_set_dev(a) + subroutine i_cuda_hlg_set_dev(a) implicit none - class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine i_hlg_set_dev + end subroutine i_cuda_hlg_set_dev - subroutine i_hlg_set_sync(a) + subroutine i_cuda_hlg_set_sync(a) implicit none - class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine i_hlg_set_sync + end subroutine i_cuda_hlg_set_sync - function i_hlg_is_dev(a) result(res) + function i_cuda_hlg_is_dev(a) result(res) implicit none - class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function i_hlg_is_dev + end function i_cuda_hlg_is_dev - function i_hlg_is_host(a) result(res) + function i_cuda_hlg_is_host(a) result(res) implicit none - class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function i_hlg_is_host + end function i_cuda_hlg_is_host - function i_hlg_is_sync(a) result(res) + function i_cuda_hlg_is_sync(a) result(res) implicit none - class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function i_hlg_is_sync + end function i_cuda_hlg_is_sync - subroutine i_hlg_finalize(a) + subroutine i_cuda_hlg_finalize(a) use hlldev_mod implicit none - type(psb_i_hlg_sparse_mat), intent(inout) :: a + type(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine i_hlg_finalize + end subroutine i_cuda_hlg_finalize #else interface - subroutine psb_i_hlg_mold(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_mold(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hlg_mold + end subroutine psb_i_cuda_hlg_mold end interface #endif -end module psb_i_hlg_mat_mod +end module psb_i_cuda_hlg_mat_mod diff --git a/cuda/psb_i_hybg_mat_mod.F90 b/cuda/psb_i_cuda_hybg_mat_mod.F90 similarity index 52% rename from cuda/psb_i_hybg_mat_mod.F90 rename to cuda/psb_i_cuda_hybg_mat_mod.F90 index 388a8801..10333c24 100644 --- a/cuda/psb_i_hybg_mat_mod.F90 +++ b/cuda/psb_i_cuda_hybg_mat_mod.F90 @@ -31,13 +31,13 @@ #if CUDA_SHORT_VERSION <= 10 -module psb_i_hybg_mat_mod +module psb_i_cuda_hybg_mat_mod use iso_c_binding use psb_i_mat_mod use cusparse_mod - type, extends(psb_i_csr_sparse_mat) :: psb_i_hybg_sparse_mat + type, extends(psb_i_csr_sparse_mat) :: psb_i_cuda_hybg_sparse_mat ! ! HYBG. An interface to the cuSPARSE HYB ! On the CPU side we keep a CSR storage. @@ -49,170 +49,170 @@ module psb_i_hybg_mat_mod type(i_Hmat) :: deviceMat contains - procedure, nopass :: get_fmt => i_hybg_get_fmt - procedure, pass(a) :: sizeof => i_hybg_sizeof - procedure, pass(a) :: vect_mv => psb_i_hybg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_i_hybg_inner_vect_sv - procedure, pass(a) :: csmm => psb_i_hybg_csmm - procedure, pass(a) :: csmv => psb_i_hybg_csmv - procedure, pass(a) :: scals => psb_i_hybg_scals - procedure, pass(a) :: scalv => psb_i_hybg_scal - procedure, pass(a) :: reallocate_nz => psb_i_hybg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_i_hybg_allocate_mnnz + procedure, nopass :: get_fmt => i_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_i_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_i_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_i_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_hybg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_hybg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_i_cp_hybg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_hybg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_i_mv_hybg_from_fmt - procedure, pass(a) :: free => i_hybg_free - procedure, pass(a) :: mold => psb_i_hybg_mold - procedure, pass(a) :: to_gpu => psb_i_hybg_to_gpu - final :: i_hybg_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => i_cuda_hybg_free + procedure, pass(a) :: mold => psb_i_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_hybg_to_gpu + final :: i_cuda_hybg_finalize #else contains - procedure, pass(a) :: mold => psb_i_hybg_mold + procedure, pass(a) :: mold => psb_i_cuda_hybg_mold #endif - end type psb_i_hybg_sparse_mat + end type psb_i_cuda_hybg_sparse_mat #ifdef HAVE_SPGPU - private :: i_hybg_get_nzeros, i_hybg_free, i_hybg_get_fmt, & - & i_hybg_get_size, i_hybg_sizeof, i_hybg_get_nz_row + private :: i_cuda_hybg_get_nzeros, i_cuda_hybg_free, i_cuda_hybg_get_fmt, & + & i_cuda_hybg_get_size, i_cuda_hybg_sizeof, i_cuda_hybg_get_nz_row interface - subroutine psb_i_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hybg_inner_vect_sv + end subroutine psb_i_cuda_hybg_inner_vect_sv end interface interface - subroutine psb_i_hybg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hybg_vect_mv + end subroutine psb_i_cuda_hybg_vect_mv end interface interface - subroutine psb_i_hybg_reallocate_nz(nz,a) - import :: psb_i_hybg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_hybg_reallocate_nz(nz,a) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_i_hybg_sparse_mat), intent(inout) :: a - end subroutine psb_i_hybg_reallocate_nz + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_hybg_reallocate_nz end interface interface - subroutine psb_i_hybg_allocate_mnnz(m,n,a,nz) - import :: psb_i_hybg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_i_hybg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_i_hybg_allocate_mnnz + end subroutine psb_i_cuda_hybg_allocate_mnnz end interface interface - subroutine psb_i_hybg_mold(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_mold(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hybg_mold + end subroutine psb_i_cuda_hybg_mold end interface interface - subroutine psb_i_hybg_to_gpu(a,info, nzrm) - import :: psb_i_hybg_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_i_hybg_to_gpu + end subroutine psb_i_cuda_hybg_to_gpu end interface interface - subroutine psb_i_cp_hybg_from_coo(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_hybg_from_coo + end subroutine psb_i_cuda_cp_hybg_from_coo end interface interface - subroutine psb_i_cp_hybg_from_fmt(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_hybg_from_fmt + end subroutine psb_i_cuda_cp_hybg_from_fmt end interface interface - subroutine psb_i_mv_hybg_from_coo(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_hybg_from_coo + end subroutine psb_i_cuda_mv_hybg_from_coo end interface interface - subroutine psb_i_mv_hybg_from_fmt(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_hybg_from_fmt + end subroutine psb_i_cuda_mv_hybg_from_fmt end interface interface - subroutine psb_i_hybg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:) integer(psb_ipk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hybg_csmv + end subroutine psb_i_cuda_hybg_csmv end interface interface - subroutine psb_i_hybg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) integer(psb_ipk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hybg_csmm + end subroutine psb_i_cuda_hybg_csmm end interface interface - subroutine psb_i_hybg_scal(d,a,info,side) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hybg_scal(d,a,info,side) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_i_hybg_scal + end subroutine psb_i_cuda_hybg_scal end interface interface - subroutine psb_i_hybg_scals(d,a,info) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hybg_scals(d,a,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hybg_scals + end subroutine psb_i_cuda_hybg_scals end interface @@ -231,9 +231,9 @@ contains ! == =================================== - function i_hybg_sizeof(a) result(res) + function i_cuda_hybg_sizeof(a) result(res) implicit none - class(psb_i_hybg_sparse_mat), intent(in) :: a + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 res = res + psb_sizeof_ip * size(a%val) @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function i_hybg_sizeof + end function i_cuda_hybg_sizeof - function i_hybg_get_fmt() result(res) + function i_cuda_hybg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HYBG' - end function i_hybg_get_fmt + end function i_cuda_hybg_get_fmt @@ -265,42 +265,42 @@ contains ! ! == =================================== - subroutine i_hybg_free(a) + subroutine i_cuda_hybg_free(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - class(psb_i_hybg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) call a%psb_i_csr_sparse_mat%free() return - end subroutine i_hybg_free + end subroutine i_cuda_hybg_free - subroutine i_hybg_finalize(a) + subroutine i_cuda_hybg_finalize(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - type(psb_i_hybg_sparse_mat), intent(inout) :: a + type(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) return - end subroutine i_hybg_finalize + end subroutine i_cuda_hybg_finalize #else interface - subroutine psb_i_hybg_mold(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_mold(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hybg_mold + end subroutine psb_i_cuda_hybg_mold end interface #endif -end module psb_i_hybg_mat_mod +end module psb_i_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_i_gpu_vect_mod.F90 b/cuda/psb_i_cuda_vect_mod.F90 similarity index 72% rename from cuda/psb_i_gpu_vect_mod.F90 rename to cuda/psb_i_cuda_vect_mod.F90 index ca4950a0..8d940513 100644 --- a/cuda/psb_i_gpu_vect_mod.F90 +++ b/cuda/psb_i_cuda_vect_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_i_gpu_vect_mod +module psb_i_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod use psb_i_vect_mod #ifdef HAVE_SPGPU - use psb_gpu_env_mod + use psb_cuda_env_mod use psb_i_vectordev_mod #endif @@ -44,7 +44,7 @@ module psb_i_gpu_vect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_i_base_vect_type) :: psb_i_vect_gpu + type, extends(psb_i_base_vect_type) :: psb_i_vect_cuda #ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr @@ -56,52 +56,52 @@ module psb_i_gpu_vect_mod type(c_ptr) :: i_buf = c_null_ptr integer :: i_buf_sz = 0 contains - procedure, pass(x) :: get_nrows => i_gpu_get_nrows - procedure, nopass :: get_fmt => i_gpu_get_fmt - - procedure, pass(x) :: all => i_gpu_all - procedure, pass(x) :: zero => i_gpu_zero - procedure, pass(x) :: asb_m => i_gpu_asb_m - procedure, pass(x) :: sync => i_gpu_sync - procedure, pass(x) :: sync_space => i_gpu_sync_space - procedure, pass(x) :: bld_x => i_gpu_bld_x - procedure, pass(x) :: bld_mn => i_gpu_bld_mn - procedure, pass(x) :: free => i_gpu_free - procedure, pass(x) :: ins_a => i_gpu_ins_a - procedure, pass(x) :: ins_v => i_gpu_ins_v - procedure, pass(x) :: is_host => i_gpu_is_host - procedure, pass(x) :: is_dev => i_gpu_is_dev - procedure, pass(x) :: is_sync => i_gpu_is_sync - procedure, pass(x) :: set_host => i_gpu_set_host - procedure, pass(x) :: set_dev => i_gpu_set_dev - procedure, pass(x) :: set_sync => i_gpu_set_sync - procedure, pass(x) :: set_scal => i_gpu_set_scal -!!$ procedure, pass(x) :: set_vect => i_gpu_set_vect - procedure, pass(x) :: gthzv_x => i_gpu_gthzv_x - procedure, pass(y) :: sctb => i_gpu_sctb - procedure, pass(y) :: sctb_x => i_gpu_sctb_x - procedure, pass(x) :: gthzbuf => i_gpu_gthzbuf - procedure, pass(y) :: sctb_buf => i_gpu_sctb_buf - procedure, pass(x) :: new_buffer => i_gpu_new_buffer - procedure, nopass :: device_wait => i_gpu_device_wait - procedure, pass(x) :: free_buffer => i_gpu_free_buffer - procedure, pass(x) :: maybe_free_buffer => i_gpu_maybe_free_buffer - - final :: i_gpu_vect_finalize + procedure, pass(x) :: get_nrows => i_cuda_get_nrows + procedure, nopass :: get_fmt => i_cuda_get_fmt + + procedure, pass(x) :: all => i_cuda_all + procedure, pass(x) :: zero => i_cuda_zero + procedure, pass(x) :: asb_m => i_cuda_asb_m + procedure, pass(x) :: sync => i_cuda_sync + procedure, pass(x) :: sync_space => i_cuda_sync_space + procedure, pass(x) :: bld_x => i_cuda_bld_x + procedure, pass(x) :: bld_mn => i_cuda_bld_mn + procedure, pass(x) :: free => i_cuda_free + procedure, pass(x) :: ins_a => i_cuda_ins_a + procedure, pass(x) :: ins_v => i_cuda_ins_v + procedure, pass(x) :: is_host => i_cuda_is_host + procedure, pass(x) :: is_dev => i_cuda_is_dev + procedure, pass(x) :: is_sync => i_cuda_is_sync + procedure, pass(x) :: set_host => i_cuda_set_host + procedure, pass(x) :: set_dev => i_cuda_set_dev + procedure, pass(x) :: set_sync => i_cuda_set_sync + procedure, pass(x) :: set_scal => i_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => i_cuda_set_vect + procedure, pass(x) :: gthzv_x => i_cuda_gthzv_x + procedure, pass(y) :: sctb => i_cuda_sctb + procedure, pass(y) :: sctb_x => i_cuda_sctb_x + procedure, pass(x) :: gthzbuf => i_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => i_cuda_sctb_buf + procedure, pass(x) :: new_buffer => i_cuda_new_buffer + procedure, nopass :: device_wait => i_cuda_device_wait + procedure, pass(x) :: free_buffer => i_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => i_cuda_maybe_free_buffer + + final :: i_cuda_vect_finalize #endif - end type psb_i_vect_gpu + end type psb_i_vect_cuda - public :: psb_i_vect_gpu_ + public :: psb_i_vect_cuda_ private :: constructor - interface psb_i_vect_gpu_ + interface psb_i_vect_cuda_ module procedure constructor - end interface psb_i_vect_gpu_ + end interface psb_i_vect_cuda_ contains function constructor(x) result(this) integer(psb_ipk_) :: x(:) - type(psb_i_vect_gpu) :: this + type(psb_i_vect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -111,20 +111,20 @@ contains #ifdef HAVE_SPGPU - subroutine i_gpu_device_wait() + subroutine i_cuda_device_wait() call psb_cudaSync() - end subroutine i_gpu_device_wait + end subroutine i_cuda_device_wait - subroutine i_gpu_new_buffer(n,x,info) + subroutine i_cuda_new_buffer(n,x,info) use psb_realloc_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(x%combuf)) then if (size(x%combuf) idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then ! ! Only need a sync in this branch; in the others ! cudamemCpy acts as a sync point. @@ -314,14 +314,14 @@ contains end select - end subroutine i_gpu_gthzv_x + end subroutine i_cuda_gthzv_x - subroutine i_gpu_gthzbuf(i,n,idx,x) - use psb_gpu_env_mod + subroutine i_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i,n class(psb_i_base_vect_type) :: idx - class(psb_i_vect_gpu) :: x + class(psb_i_vect_cuda) :: x integer :: info, ni info = 0 @@ -332,11 +332,11 @@ contains end if select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) @@ -367,14 +367,14 @@ contains end select - end subroutine i_gpu_gthzbuf + end subroutine i_cuda_gthzbuf - subroutine i_gpu_sctb(n,idx,x,beta,y) + subroutine i_cuda_sctb(n,idx,x,beta,y) implicit none !use psb_const_mod integer(psb_ipk_) :: n, idx(:) integer(psb_ipk_) :: beta, x(:) - class(psb_i_vect_gpu) :: y + class(psb_i_vect_cuda) :: y integer(psb_ipk_) :: info if (n == 0) return @@ -384,24 +384,24 @@ contains call y%psb_i_base_vect_type%sctb(n,idx,x,beta) call y%set_host() - end subroutine i_gpu_sctb + end subroutine i_cuda_sctb - subroutine i_gpu_sctb_x(i,n,idx,x,beta,y) - use psb_gpu_env_mod + subroutine i_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx integer(psb_ipk_) :: beta, x(:) - class(psb_i_vect_gpu) :: y + class(psb_i_vect_cuda) :: y integer :: info, ni select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() ! - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(y%pinned_buffer)) then if (size(y%pinned_buffer) < n) then call inner_unregister(y%pinned_buffer) @@ -489,16 +489,16 @@ contains call psb_cudaSync() call y%set_dev() - end subroutine i_gpu_sctb_x + end subroutine i_cuda_sctb_x - subroutine i_gpu_sctb_buf(i,n,idx,beta,y) + subroutine i_cuda_sctb_buf(i,n,idx,beta,y) use psi_serial_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx integer(psb_ipk_) :: beta - class(psb_i_vect_gpu) :: y + class(psb_i_vect_cuda) :: y integer(psb_ipk_) :: info, ni !!$ write(0,*) 'Starting sctb_buf' @@ -509,11 +509,11 @@ contains select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) else @@ -540,106 +540,106 @@ contains end select !!$ write(0,*) 'Done sctb_buf' - end subroutine i_gpu_sctb_buf + end subroutine i_cuda_sctb_buf - subroutine i_gpu_bld_x(x,this) + subroutine i_cuda_bld_x(x,this) use psb_base_mod integer(psb_ipk_), intent(in) :: this(:) - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call psb_realloc(size(this),x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'i_gpu_bld_x',& + call psb_errpush(info,'i_cuda_bld_x',& & i_err=(/size(this),izero,izero,izero,izero/)) end if x%v(:) = this(:) call x%set_host() call x%sync() - end subroutine i_gpu_bld_x + end subroutine i_cuda_bld_x - subroutine i_gpu_bld_mn(x,n) + subroutine i_cuda_bld_mn(x,n) integer(psb_mpk_), intent(in) :: n - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(n,info) if (info /= 0) then - call psb_errpush(info,'i_gpu_bld_n',i_err=(/n,n,n,n,n/)) + call psb_errpush(info,'i_cuda_bld_n',i_err=(/n,n,n,n,n/)) end if - end subroutine i_gpu_bld_mn + end subroutine i_cuda_bld_mn - subroutine i_gpu_set_host(x) + subroutine i_cuda_set_host(x) implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x x%state = is_host - end subroutine i_gpu_set_host + end subroutine i_cuda_set_host - subroutine i_gpu_set_dev(x) + subroutine i_cuda_set_dev(x) implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x x%state = is_dev - end subroutine i_gpu_set_dev + end subroutine i_cuda_set_dev - subroutine i_gpu_set_sync(x) + subroutine i_cuda_set_sync(x) implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x x%state = is_sync - end subroutine i_gpu_set_sync + end subroutine i_cuda_set_sync - function i_gpu_is_dev(x) result(res) + function i_cuda_is_dev(x) result(res) implicit none - class(psb_i_vect_gpu), intent(in) :: x + class(psb_i_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function i_gpu_is_dev + end function i_cuda_is_dev - function i_gpu_is_host(x) result(res) + function i_cuda_is_host(x) result(res) implicit none - class(psb_i_vect_gpu), intent(in) :: x + class(psb_i_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function i_gpu_is_host + end function i_cuda_is_host - function i_gpu_is_sync(x) result(res) + function i_cuda_is_sync(x) result(res) implicit none - class(psb_i_vect_gpu), intent(in) :: x + class(psb_i_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function i_gpu_is_sync + end function i_cuda_is_sync - function i_gpu_get_nrows(x) result(res) + function i_cuda_get_nrows(x) result(res) implicit none - class(psb_i_vect_gpu), intent(in) :: x + class(psb_i_vect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = size(x%v) - end function i_gpu_get_nrows + end function i_cuda_get_nrows - function i_gpu_get_fmt() result(res) + function i_cuda_get_fmt() result(res) implicit none character(len=5) :: res res = 'iGPU' - end function i_gpu_get_fmt + end function i_cuda_get_fmt - subroutine i_gpu_all(n, x, info) + subroutine i_cuda_all(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: n - class(psb_i_vect_gpu), intent(out) :: x + class(psb_i_vect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(n,x%v,info) @@ -647,26 +647,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'i_gpu_all',& + call psb_errpush(info,'i_cuda_all',& & i_err=(/n,n,n,n,n/)) end if - end subroutine i_gpu_all + end subroutine i_cuda_all - subroutine i_gpu_zero(x) + subroutine i_cuda_zero(x) use psi_serial_mod implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=izero call x%set_host() - end subroutine i_gpu_zero + end subroutine i_cuda_zero - subroutine i_gpu_asb_m(n, x, info) + subroutine i_cuda_asb_m(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_mpk_), intent(in) :: n - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: nd @@ -686,12 +686,12 @@ contains end if end if - end subroutine i_gpu_asb_m + end subroutine i_cuda_asb_m - subroutine i_gpu_sync_space(x,info) + subroutine i_cuda_sync_space(x,info) use psb_base_mod, only : psb_realloc implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh, nd @@ -730,12 +730,12 @@ contains end if end if - end subroutine i_gpu_sync_space + end subroutine i_cuda_sync_space - subroutine i_gpu_sync(x) + subroutine i_cuda_sync(x) use psb_base_mod, only : psb_realloc implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -761,31 +761,31 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'i_gpu_sync') + call psb_errpush(info,'i_cuda_sync') end if - end subroutine i_gpu_sync + end subroutine i_cuda_sync - subroutine i_gpu_free(x, info) + subroutine i_cuda_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) if (c_associated(x%deviceVect)) then -!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' call freeMultiVecDevice(x%deviceVect) x%deviceVect=c_null_ptr end if call x%free_buffer(info) call x%set_sync() - end subroutine i_gpu_free + end subroutine i_cuda_free - subroutine i_gpu_set_scal(x,val,first,last) - class(psb_i_vect_gpu), intent(inout) :: x + subroutine i_cuda_set_scal(x,val,first,last) + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: val integer(psb_ipk_), optional :: first, last @@ -800,10 +800,10 @@ contains info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() - end subroutine i_gpu_set_scal + end subroutine i_cuda_set_scal !!$ -!!$ subroutine i_gpu_set_vect(x,val) -!!$ class(psb_i_vect_gpu), intent(inout) :: x +!!$ subroutine i_cuda_set_vect(x,val) +!!$ class(psb_i_vect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: val(:) !!$ integer(psb_ipk_) :: nr !!$ integer(psb_ipk_) :: info @@ -812,68 +812,68 @@ contains !!$ call x%psb_i_base_vect_type%set_vect(val) !!$ call x%set_host() !!$ -!!$ end subroutine i_gpu_set_vect +!!$ end subroutine i_cuda_set_vect - subroutine i_gpu_vect_finalize(x) + subroutine i_cuda_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_i_vect_gpu), intent(inout) :: x + type(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 call x%free(info) - end subroutine i_gpu_vect_finalize + end subroutine i_cuda_vect_finalize - subroutine i_gpu_ins_v(n,irl,val,dupl,x,info) + subroutine i_cuda_ins_v(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl class(psb_i_base_vect_type), intent(inout) :: irl class(psb_i_base_vect_type), intent(inout) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, isz - logical :: done_gpu + logical :: done_cuda info = 0 if (psb_errstatus_fatal()) return - done_gpu = .false. + done_cuda = .false. select type(virl => irl) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type(vval => val) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() info = geinsMultiVecDeviceInt(n,virl%deviceVect,& & vval%deviceVect,dupl,1,x%deviceVect) call x%set_dev() - done_gpu=.true. + done_cuda=.true. end select end select - if (.not.done_gpu) then + if (.not.done_cuda) then if (irl%is_dev()) call irl%sync() if (val%is_dev()) call val%sync() call x%ins(n,irl%v,val%v,dupl,info) end if if (info /= 0) then - call psb_errpush(info,'gpu_vect_ins') + call psb_errpush(info,'cuda_vect_ins') return end if - end subroutine i_gpu_ins_v + end subroutine i_cuda_ins_v - subroutine i_gpu_ins_a(n,irl,val,dupl,x,info) + subroutine i_cuda_ins_a(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) integer(psb_ipk_), intent(in) :: val(:) @@ -886,11 +886,11 @@ contains call x%psb_i_base_vect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine i_gpu_ins_a + end subroutine i_cuda_ins_a #endif -end module psb_i_gpu_vect_mod +end module psb_i_cuda_vect_mod ! @@ -899,7 +899,7 @@ end module psb_i_gpu_vect_mod -module psb_i_gpu_multivect_mod +module psb_i_cuda_multivect_mod use iso_c_binding use psb_const_mod use psb_error_mod @@ -914,7 +914,7 @@ module psb_i_gpu_multivect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_i_base_multivect_type) :: psb_i_multivect_gpu + type, extends(psb_i_base_multivect_type) :: psb_i_multivect_cuda #ifdef HAVE_SPGPU integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 @@ -922,48 +922,48 @@ module psb_i_gpu_multivect_mod real(c_double), allocatable :: buffer(:,:) type(c_ptr) :: dt_buf = c_null_ptr contains - procedure, pass(x) :: get_nrows => i_gpu_multi_get_nrows - procedure, pass(x) :: get_ncols => i_gpu_multi_get_ncols - procedure, nopass :: get_fmt => i_gpu_multi_get_fmt -!!$ procedure, pass(x) :: dot_v => i_gpu_multi_dot_v -!!$ procedure, pass(x) :: dot_a => i_gpu_multi_dot_a -!!$ procedure, pass(y) :: axpby_v => i_gpu_multi_axpby_v -!!$ procedure, pass(y) :: axpby_a => i_gpu_multi_axpby_a -!!$ procedure, pass(y) :: mlt_v => i_gpu_multi_mlt_v -!!$ procedure, pass(y) :: mlt_a => i_gpu_multi_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => i_gpu_multi_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => i_gpu_multi_mlt_v_2 -!!$ procedure, pass(x) :: scal => i_gpu_multi_scal -!!$ procedure, pass(x) :: nrm2 => i_gpu_multi_nrm2 -!!$ procedure, pass(x) :: amax => i_gpu_multi_amax -!!$ procedure, pass(x) :: asum => i_gpu_multi_asum - procedure, pass(x) :: all => i_gpu_multi_all - procedure, pass(x) :: zero => i_gpu_multi_zero - procedure, pass(x) :: asb => i_gpu_multi_asb - procedure, pass(x) :: sync => i_gpu_multi_sync - procedure, pass(x) :: sync_space => i_gpu_multi_sync_space - procedure, pass(x) :: bld_x => i_gpu_multi_bld_x - procedure, pass(x) :: bld_n => i_gpu_multi_bld_n - procedure, pass(x) :: free => i_gpu_multi_free - procedure, pass(x) :: ins => i_gpu_multi_ins - procedure, pass(x) :: is_host => i_gpu_multi_is_host - procedure, pass(x) :: is_dev => i_gpu_multi_is_dev - procedure, pass(x) :: is_sync => i_gpu_multi_is_sync - procedure, pass(x) :: set_host => i_gpu_multi_set_host - procedure, pass(x) :: set_dev => i_gpu_multi_set_dev - procedure, pass(x) :: set_sync => i_gpu_multi_set_sync - procedure, pass(x) :: set_scal => i_gpu_multi_set_scal - procedure, pass(x) :: set_vect => i_gpu_multi_set_vect -!!$ procedure, pass(x) :: gthzv_x => i_gpu_multi_gthzv_x -!!$ procedure, pass(y) :: sctb => i_gpu_multi_sctb -!!$ procedure, pass(y) :: sctb_x => i_gpu_multi_sctb_x - final :: i_gpu_multi_vect_finalize + procedure, pass(x) :: get_nrows => i_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => i_cuda_multi_get_ncols + procedure, nopass :: get_fmt => i_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => i_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => i_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => i_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => i_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => i_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => i_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => i_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => i_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => i_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => i_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => i_cuda_multi_amax +!!$ procedure, pass(x) :: asum => i_cuda_multi_asum + procedure, pass(x) :: all => i_cuda_multi_all + procedure, pass(x) :: zero => i_cuda_multi_zero + procedure, pass(x) :: asb => i_cuda_multi_asb + procedure, pass(x) :: sync => i_cuda_multi_sync + procedure, pass(x) :: sync_space => i_cuda_multi_sync_space + procedure, pass(x) :: bld_x => i_cuda_multi_bld_x + procedure, pass(x) :: bld_n => i_cuda_multi_bld_n + procedure, pass(x) :: free => i_cuda_multi_free + procedure, pass(x) :: ins => i_cuda_multi_ins + procedure, pass(x) :: is_host => i_cuda_multi_is_host + procedure, pass(x) :: is_dev => i_cuda_multi_is_dev + procedure, pass(x) :: is_sync => i_cuda_multi_is_sync + procedure, pass(x) :: set_host => i_cuda_multi_set_host + procedure, pass(x) :: set_dev => i_cuda_multi_set_dev + procedure, pass(x) :: set_sync => i_cuda_multi_set_sync + procedure, pass(x) :: set_scal => i_cuda_multi_set_scal + procedure, pass(x) :: set_vect => i_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => i_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => i_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => i_cuda_multi_sctb_x + final :: i_cuda_multi_vect_finalize #endif - end type psb_i_multivect_gpu + end type psb_i_multivect_cuda - public :: psb_i_multivect_gpu + public :: psb_i_multivect_cuda private :: constructor - interface psb_i_multivect_gpu + interface psb_i_multivect_cuda module procedure constructor end interface @@ -971,7 +971,7 @@ contains function constructor(x) result(this) integer(psb_ipk_) :: x(:,:) - type(psb_i_multivect_gpu) :: this + type(psb_i_multivect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -981,15 +981,15 @@ contains #ifdef HAVE_SPGPU -!!$ subroutine i_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ subroutine i_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i,n !!$ class(psb_i_base_multivect_type) :: idx !!$ integer(psb_ipk_) :: y(:) -!!$ class(psb_i_multivect_gpu) :: x +!!$ class(psb_i_multivect_cuda) :: x !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (x%is_host()) call x%sync() !!$ @@ -1014,16 +1014,16 @@ contains !!$ end select !!$ !!$ -!!$ end subroutine i_gpu_multi_gthzv_x +!!$ end subroutine i_cuda_multi_gthzv_x !!$ !!$ !!$ -!!$ subroutine i_gpu_multi_sctb(n,idx,x,beta,y) +!!$ subroutine i_cuda_multi_sctb(n,idx,x,beta,y) !!$ implicit none !!$ !use psb_const_mod !!$ integer(psb_ipk_) :: n, idx(:) !!$ integer(psb_ipk_) :: beta, x(:) -!!$ class(psb_i_multivect_gpu) :: y +!!$ class(psb_i_multivect_cuda) :: y !!$ integer(psb_ipk_) :: info !!$ !!$ if (n == 0) return @@ -1033,17 +1033,17 @@ contains !!$ call y%psb_i_base_multivect_type%sctb(n,idx,x,beta) !!$ call y%set_host() !!$ -!!$ end subroutine i_gpu_multi_sctb +!!$ end subroutine i_cuda_multi_sctb !!$ -!!$ subroutine i_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ subroutine i_cuda_multi_sctb_x(i,n,idx,x,beta,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i, n !!$ class(psb_i_base_multivect_type) :: idx !!$ integer(psb_ipk_) :: beta, x(:) -!!$ class(psb_i_multivect_gpu) :: y +!!$ class(psb_i_multivect_cuda) :: y !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (y%is_host()) call y%sync() !!$ @@ -1069,13 +1069,13 @@ contains !!$ call y%sct(n,ii%v(i:),x,beta) !!$ end select !!$ -!!$ end subroutine i_gpu_multi_sctb_x +!!$ end subroutine i_cuda_multi_sctb_x - subroutine i_gpu_multi_bld_x(x,this) + subroutine i_cuda_multi_bld_x(x,this) use psb_base_mod integer(psb_ipk_), intent(in) :: this(:,:) - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info, m, n m=size(this,1) @@ -1085,101 +1085,101 @@ contains call psb_realloc(m,n,x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'i_gpu_multi_bld_x',& + call psb_errpush(info,'i_cuda_multi_bld_x',& & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) end if x%v(1:m,1:n) = this(1:m,1:n) call x%set_host() call x%sync() - end subroutine i_gpu_multi_bld_x + end subroutine i_cuda_multi_bld_x - subroutine i_gpu_multi_bld_n(x,m,n) + subroutine i_cuda_multi_bld_n(x,m,n) integer(psb_ipk_), intent(in) :: m,n - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(m,n,info) if (info /= 0) then - call psb_errpush(info,'i_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + call psb_errpush(info,'i_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) end if - end subroutine i_gpu_multi_bld_n + end subroutine i_cuda_multi_bld_n - subroutine i_gpu_multi_set_host(x) + subroutine i_cuda_multi_set_host(x) implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x x%state = is_host - end subroutine i_gpu_multi_set_host + end subroutine i_cuda_multi_set_host - subroutine i_gpu_multi_set_dev(x) + subroutine i_cuda_multi_set_dev(x) implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x x%state = is_dev - end subroutine i_gpu_multi_set_dev + end subroutine i_cuda_multi_set_dev - subroutine i_gpu_multi_set_sync(x) + subroutine i_cuda_multi_set_sync(x) implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x x%state = is_sync - end subroutine i_gpu_multi_set_sync + end subroutine i_cuda_multi_set_sync - function i_gpu_multi_is_dev(x) result(res) + function i_cuda_multi_is_dev(x) result(res) implicit none - class(psb_i_multivect_gpu), intent(in) :: x + class(psb_i_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function i_gpu_multi_is_dev + end function i_cuda_multi_is_dev - function i_gpu_multi_is_host(x) result(res) + function i_cuda_multi_is_host(x) result(res) implicit none - class(psb_i_multivect_gpu), intent(in) :: x + class(psb_i_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function i_gpu_multi_is_host + end function i_cuda_multi_is_host - function i_gpu_multi_is_sync(x) result(res) + function i_cuda_multi_is_sync(x) result(res) implicit none - class(psb_i_multivect_gpu), intent(in) :: x + class(psb_i_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function i_gpu_multi_is_sync + end function i_cuda_multi_is_sync - function i_gpu_multi_get_nrows(x) result(res) + function i_cuda_multi_get_nrows(x) result(res) implicit none - class(psb_i_multivect_gpu), intent(in) :: x + class(psb_i_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_nrows - end function i_gpu_multi_get_nrows + end function i_cuda_multi_get_nrows - function i_gpu_multi_get_ncols(x) result(res) + function i_cuda_multi_get_ncols(x) result(res) implicit none - class(psb_i_multivect_gpu), intent(in) :: x + class(psb_i_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_ncols - end function i_gpu_multi_get_ncols + end function i_cuda_multi_get_ncols - function i_gpu_multi_get_fmt() result(res) + function i_cuda_multi_get_fmt() result(res) implicit none character(len=5) :: res res = 'iGPU' - end function i_gpu_multi_get_fmt + end function i_cuda_multi_get_fmt -!!$ function i_gpu_multi_dot_v(n,x,y) result(res) +!!$ function i_cuda_multi_dot_v(n,x,y) result(res) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ class(psb_i_base_multivect_type), intent(inout) :: y !!$ integer(psb_ipk_), intent(in) :: n !!$ integer(psb_ipk_) :: res @@ -1196,13 +1196,13 @@ contains !!$ type is (psb_i_base_multivect_type) !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,x%v,1,yy%v,1) -!!$ type is (psb_i_multivect_gpu) +!!$ type is (psb_i_multivect_cuda) !!$ if (x%is_host()) call x%sync() !!$ if (yy%is_host()) call yy%sync() !!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) !!$ if (info /= 0) then !!$ info = psb_err_internal_error_ -!!$ call psb_errpush(info,'i_gpu_multi_dot_v') +!!$ call psb_errpush(info,'i_cuda_multi_dot_v') !!$ end if !!$ !!$ class default @@ -1211,11 +1211,11 @@ contains !!$ res = y%dot(n,x%v) !!$ end select !!$ -!!$ end function i_gpu_multi_dot_v +!!$ end function i_cuda_multi_dot_v !!$ -!!$ function i_gpu_multi_dot_a(n,x,y) result(res) +!!$ function i_cuda_multi_dot_a(n,x,y) result(res) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: y(:) !!$ integer(psb_ipk_), intent(in) :: n !!$ integer(psb_ipk_) :: res @@ -1224,14 +1224,14 @@ contains !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,y,1,x%v,1) !!$ -!!$ end function i_gpu_multi_dot_a +!!$ end function i_cuda_multi_dot_a !!$ -!!$ subroutine i_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine i_cuda_multi_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ class(psb_i_base_multivect_type), intent(inout) :: x -!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ class(psb_i_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: nx, ny @@ -1244,7 +1244,7 @@ contains !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() -!!$ type is (psb_i_multivect_gpu) +!!$ type is (psb_i_multivect_cuda) !!$ ! Do something different here !!$ if ((beta /= dzero).and.y%is_host())& !!$ & call y%sync() @@ -1263,27 +1263,27 @@ contains !!$ call y%axpby(m,alpha,x%v,beta,info) !!$ end select !!$ -!!$ end subroutine i_gpu_multi_axpby_v +!!$ end subroutine i_cuda_multi_axpby_v !!$ -!!$ subroutine i_gpu_multi_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine i_cuda_multi_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ integer(psb_ipk_), intent(in) :: x(:) -!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ class(psb_i_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ if (y%is_dev()) call y%sync() !!$ call psb_geaxpby(m,alpha,x,beta,y%v,info) !!$ call y%set_host() -!!$ end subroutine i_gpu_multi_axpby_a +!!$ end subroutine i_cuda_multi_axpby_a !!$ -!!$ subroutine i_gpu_multi_mlt_v(x, y, info) +!!$ subroutine i_cuda_multi_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_i_base_multivect_type), intent(inout) :: x -!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ class(psb_i_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ integer(psb_ipk_) :: i, n @@ -1297,7 +1297,7 @@ contains !!$ y%v(i) = y%v(i) * xx%v(i) !!$ end do !!$ call y%set_host() -!!$ type is (psb_i_multivect_gpu) +!!$ type is (psb_i_multivect_cuda) !!$ ! Do something different here !!$ if (y%is_host()) call y%sync() !!$ if (xx%is_host()) call xx%sync() @@ -1309,13 +1309,13 @@ contains !!$ call y%set_host() !!$ end select !!$ -!!$ end subroutine i_gpu_multi_mlt_v +!!$ end subroutine i_cuda_multi_mlt_v !!$ -!!$ subroutine i_gpu_multi_mlt_a(x, y, info) +!!$ subroutine i_cuda_multi_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: x(:) -!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ class(psb_i_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1323,15 +1323,15 @@ contains !!$ call y%sync() !!$ call y%psb_i_base_multivect_type%mlt(x,info) !!$ call y%set_host() -!!$ end subroutine i_gpu_multi_mlt_a +!!$ end subroutine i_cuda_multi_mlt_a !!$ -!!$ subroutine i_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine i_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: alpha,beta !!$ integer(psb_ipk_), intent(in) :: x(:) !!$ integer(psb_ipk_), intent(in) :: y(:) -!!$ class(psb_i_multivect_gpu), intent(inout) :: z +!!$ class(psb_i_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1339,16 +1339,16 @@ contains !!$ if (z%is_dev()) call z%sync() !!$ call z%psb_i_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() -!!$ end subroutine i_gpu_multi_mlt_a_2 +!!$ end subroutine i_cuda_multi_mlt_a_2 !!$ -!!$ subroutine i_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ subroutine i_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: alpha,beta !!$ class(psb_i_base_multivect_type), intent(inout) :: x !!$ class(psb_i_base_multivect_type), intent(inout) :: y -!!$ class(psb_i_multivect_gpu), intent(inout) :: z +!!$ class(psb_i_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ character(len=1), intent(in), optional :: conjgx, conjgy !!$ integer(psb_ipk_) :: i, n @@ -1371,9 +1371,9 @@ contains !!$ ! !!$ info = 0 !!$ select type(xx => x) -!!$ type is (psb_i_multivect_gpu) +!!$ type is (psb_i_multivect_cuda) !!$ select type (yy => y) -!!$ type is (psb_i_multivect_gpu) +!!$ type is (psb_i_multivect_cuda) !!$ if (xx%is_host()) call xx%sync() !!$ if (yy%is_host()) call yy%sync() !!$ ! Z state is irrelevant: it will be done on the GPU. @@ -1393,11 +1393,11 @@ contains !!$ call z%psb_i_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() !!$ end select -!!$ end subroutine i_gpu_multi_mlt_v_2 +!!$ end subroutine i_cuda_multi_mlt_v_2 - subroutine i_gpu_multi_set_scal(x,val) - class(psb_i_multivect_gpu), intent(inout) :: x + subroutine i_cuda_multi_set_scal(x,val) + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: val integer(psb_ipk_) :: info @@ -1405,10 +1405,10 @@ contains if (x%is_dev()) call x%sync() call x%psb_i_base_multivect_type%set_scal(val) call x%set_host() - end subroutine i_gpu_multi_set_scal + end subroutine i_cuda_multi_set_scal - subroutine i_gpu_multi_set_vect(x,val) - class(psb_i_multivect_gpu), intent(inout) :: x + subroutine i_cuda_multi_set_vect(x,val) + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: val(:,:) integer(psb_ipk_) :: nr integer(psb_ipk_) :: info @@ -1417,24 +1417,24 @@ contains call x%psb_i_base_multivect_type%set_vect(val) call x%set_host() - end subroutine i_gpu_multi_set_vect + end subroutine i_cuda_multi_set_vect -!!$ subroutine i_gpu_multi_scal(alpha, x) +!!$ subroutine i_cuda_multi_scal(alpha, x) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent (in) :: alpha !!$ !!$ if (x%is_dev()) call x%sync() !!$ call x%psb_i_base_multivect_type%scal(alpha) !!$ call x%set_host() -!!$ end subroutine i_gpu_multi_scal +!!$ end subroutine i_cuda_multi_scal !!$ !!$ -!!$ function i_gpu_multi_nrm2(n,x) result(res) +!!$ function i_cuda_multi_nrm2(n,x) result(res) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ integer(psb_ipk_) :: res !!$ integer(psb_ipk_) :: info @@ -1442,36 +1442,36 @@ contains !!$ if (x%is_host()) call x%sync() !!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) !!$ -!!$ end function i_gpu_multi_nrm2 +!!$ end function i_cuda_multi_nrm2 !!$ -!!$ function i_gpu_multi_amax(n,x) result(res) +!!$ function i_cuda_multi_amax(n,x) result(res) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ integer(psb_ipk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = maxval(abs(x%v(1:n))) !!$ -!!$ end function i_gpu_multi_amax +!!$ end function i_cuda_multi_amax !!$ -!!$ function i_gpu_multi_asum(n,x) result(res) +!!$ function i_cuda_multi_asum(n,x) result(res) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ integer(psb_ipk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = sum(abs(x%v(1:n))) !!$ -!!$ end function i_gpu_multi_asum +!!$ end function i_cuda_multi_asum - subroutine i_gpu_multi_all(m,n, x, info) + subroutine i_cuda_multi_all(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_i_multivect_gpu), intent(out) :: x + class(psb_i_multivect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(m,n,x%v,info,pad=izero) @@ -1481,26 +1481,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'i_gpu_multi_all',& + call psb_errpush(info,'i_cuda_multi_all',& & i_err=(/m,n,n,n,n/)) end if - end subroutine i_gpu_multi_all + end subroutine i_cuda_multi_all - subroutine i_gpu_multi_zero(x) + subroutine i_cuda_multi_zero(x) use psi_serial_mod implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine i_gpu_multi_zero + end subroutine i_cuda_multi_zero - subroutine i_gpu_multi_asb(m,n, x, info) + subroutine i_cuda_multi_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nd, nc @@ -1520,12 +1520,12 @@ contains call x%set_host() end if end if - end subroutine i_gpu_multi_asb + end subroutine i_cuda_multi_asb - subroutine i_gpu_multi_sync_space(x,info) + subroutine i_cuda_multi_sync_space(x,info) use psb_realloc_mod implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: mh,nh,md,nd @@ -1578,11 +1578,11 @@ contains end if - end subroutine i_gpu_multi_sync_space + end subroutine i_cuda_multi_sync_space - subroutine i_gpu_multi_sync(x) + subroutine i_cuda_multi_sync(x) implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -1598,16 +1598,16 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'i_gpu_multi_sync') + call psb_errpush(info,'i_cuda_multi_sync') end if - end subroutine i_gpu_multi_sync + end subroutine i_cuda_multi_sync - subroutine i_gpu_multi_free(x, info) + subroutine i_cuda_multi_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 @@ -1622,13 +1622,13 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine i_gpu_multi_free + end subroutine i_cuda_multi_free - subroutine i_gpu_multi_vect_finalize(x) + subroutine i_cuda_multi_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_i_multivect_gpu), intent(inout) :: x + type(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 @@ -1643,12 +1643,12 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine i_gpu_multi_vect_finalize + end subroutine i_cuda_multi_vect_finalize - subroutine i_gpu_multi_ins(n,irl,val,dupl,x,info) + subroutine i_cuda_multi_ins(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) integer(psb_ipk_), intent(in) :: val(:,:) @@ -1661,11 +1661,11 @@ contains call x%psb_i_base_multivect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine i_gpu_multi_ins + end subroutine i_cuda_multi_ins #endif -end module psb_i_gpu_multivect_mod +end module psb_i_cuda_multivect_mod diff --git a/cuda/psb_s_csrg_mat_mod.F90 b/cuda/psb_s_csrg_mat_mod.F90 deleted file mode 100644 index cface9f5..00000000 --- a/cuda/psb_s_csrg_mat_mod.F90 +++ /dev/null @@ -1,393 +0,0 @@ -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! - - -module psb_s_csrg_mat_mod - - use iso_c_binding - use psb_s_mat_mod - use cusparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_s_csr_sparse_mat) :: psb_s_csrg_sparse_mat - ! - ! cuSPARSE 4.0 CSR format. - ! - ! - ! - ! - ! -#ifdef HAVE_SPGPU - type(s_Cmat) :: deviceMat - integer(psb_ipk_) :: devstate = is_host - - contains - procedure, nopass :: get_fmt => s_csrg_get_fmt - procedure, pass(a) :: sizeof => s_csrg_sizeof - procedure, pass(a) :: vect_mv => psb_s_csrg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_s_csrg_inner_vect_sv - procedure, pass(a) :: csmm => psb_s_csrg_csmm - procedure, pass(a) :: csmv => psb_s_csrg_csmv - procedure, pass(a) :: scals => psb_s_csrg_scals - procedure, pass(a) :: scalv => psb_s_csrg_scal - procedure, pass(a) :: reallocate_nz => psb_s_csrg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_s_csrg_allocate_mnnz - ! Note: we do *not* need the TO methods, because the parent type - ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_csrg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_cp_csrg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_csrg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_mv_csrg_from_fmt - procedure, pass(a) :: free => s_csrg_free - procedure, pass(a) :: mold => psb_s_csrg_mold - procedure, pass(a) :: is_host => s_csrg_is_host - procedure, pass(a) :: is_dev => s_csrg_is_dev - procedure, pass(a) :: is_sync => s_csrg_is_sync - procedure, pass(a) :: set_host => s_csrg_set_host - procedure, pass(a) :: set_dev => s_csrg_set_dev - procedure, pass(a) :: set_sync => s_csrg_set_sync - procedure, pass(a) :: sync => s_csrg_sync - procedure, pass(a) :: to_gpu => psb_s_csrg_to_gpu - procedure, pass(a) :: from_gpu => psb_s_csrg_from_gpu - final :: s_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_csrg_mold -#endif - end type psb_s_csrg_sparse_mat - -#ifdef HAVE_SPGPU - private :: s_csrg_get_nzeros, s_csrg_free, s_csrg_get_fmt, & - & s_csrg_get_size, s_csrg_sizeof, s_csrg_get_nz_row - - - interface - subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta - class(psb_s_base_vect_type), intent(inout) :: x - class(psb_s_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_csrg_inner_vect_sv - end interface - - - interface - subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta - class(psb_s_base_vect_type), intent(inout) :: x - class(psb_s_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_csrg_vect_mv - end interface - - interface - subroutine psb_s_csrg_reallocate_nz(nz,a) - import :: psb_s_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: nz - class(psb_s_csrg_sparse_mat), intent(inout) :: a - end subroutine psb_s_csrg_reallocate_nz - end interface - - interface - subroutine psb_s_csrg_allocate_mnnz(m,n,a,nz) - import :: psb_s_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: m,n - class(psb_s_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_s_csrg_allocate_mnnz - end interface - - interface - subroutine psb_s_csrg_mold(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_csrg_mold - end interface - - interface - subroutine psb_s_csrg_to_gpu(a,info, nzrm) - import :: psb_s_csrg_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_s_csrg_to_gpu - end interface - - interface - subroutine psb_s_csrg_from_gpu(a,info) - import :: psb_s_csrg_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_csrg_from_gpu - end interface - - interface - subroutine psb_s_cp_csrg_from_coo(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - class(psb_s_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_csrg_from_coo - end interface - - interface - subroutine psb_s_cp_csrg_from_fmt(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - class(psb_s_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_csrg_from_fmt - end interface - - interface - subroutine psb_s_mv_csrg_from_coo(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - class(psb_s_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_csrg_from_coo - end interface - - interface - subroutine psb_s_mv_csrg_from_fmt(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - class(psb_s_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_csrg_from_fmt - end interface - - interface - subroutine psb_s_csrg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta, x(:) - real(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_csrg_csmv - end interface - interface - subroutine psb_s_csrg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta, x(:,:) - real(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_csrg_csmm - end interface - - interface - subroutine psb_s_csrg_scal(d,a,info,side) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - real(psb_spk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side - end subroutine psb_s_csrg_scal - end interface - - interface - subroutine psb_s_csrg_scals(d,a,info) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - real(psb_spk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_csrg_scals - end interface - - -contains - - ! == =================================== - ! - ! - ! - ! Getters - ! - ! - ! - ! - ! - ! == =================================== - - - function s_csrg_sizeof(a) result(res) - implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - res = 8 - res = res + psb_sizeof_sp * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - ! Should we account for the shadow data structure - ! on the GPU device side? - ! res = 2*res - - end function s_csrg_sizeof - - function s_csrg_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSRG' - end function s_csrg_get_fmt - - - - ! == =================================== - ! - ! - ! - ! Data management - ! - ! - ! - ! - ! - ! == =================================== - - - subroutine s_csrg_set_host(a) - implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine s_csrg_set_host - - subroutine s_csrg_set_dev(a) - implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine s_csrg_set_dev - - subroutine s_csrg_set_sync(a) - implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine s_csrg_set_sync - - function s_csrg_is_dev(a) result(res) - implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function s_csrg_is_dev - - function s_csrg_is_host(a) result(res) - implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function s_csrg_is_host - - function s_csrg_is_sync(a) result(res) - implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function s_csrg_is_sync - - - subroutine s_csrg_sync(a) - implicit none - class(psb_s_csrg_sparse_mat), target, intent(in) :: a - class(psb_s_csrg_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (tmpa%is_host()) then - call tmpa%to_gpu(info) - else if (tmpa%is_dev()) then - call tmpa%from_gpu(info) - end if - call tmpa%set_sync() - return - - end subroutine s_csrg_sync - - subroutine s_csrg_free(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - class(psb_s_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - call a%psb_s_csr_sparse_mat%free() - - return - - end subroutine s_csrg_free - - subroutine s_csrg_finalize(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - type(psb_s_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - - return - - end subroutine s_csrg_finalize - -#else - interface - subroutine psb_s_csrg_mold(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_csrg_mold - end interface - -#endif - -end module psb_s_csrg_mat_mod diff --git a/cuda/psb_s_cuda_csrg_mat_mod.F90 b/cuda/psb_s_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..fb13d034 --- /dev/null +++ b/cuda/psb_s_cuda_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_cuda_csrg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_csr_sparse_mat) :: psb_s_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(s_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => s_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_s_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_s_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => s_cuda_csrg_free + procedure, pass(a) :: mold => psb_s_cuda_csrg_mold + procedure, pass(a) :: is_host => s_cuda_csrg_is_host + procedure, pass(a) :: is_dev => s_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => s_cuda_csrg_is_sync + procedure, pass(a) :: set_host => s_cuda_csrg_set_host + procedure, pass(a) :: set_dev => s_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => s_cuda_csrg_set_sync + procedure, pass(a) :: sync => s_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_s_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_s_cuda_csrg_from_gpu + final :: s_cuda_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_cuda_csrg_mold +#endif + end type psb_s_cuda_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_cuda_csrg_get_nzeros, s_cuda_csrg_free, s_cuda_csrg_get_fmt, & + & s_cuda_csrg_get_size, s_cuda_csrg_sizeof, s_cuda_csrg_get_nz_row + + + interface + subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_s_cuda_csrg_reallocate_nz(nz,a) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_s_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_s_cuda_csrg_mold(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_mold + end interface + + interface + subroutine psb_s_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_s_cuda_csrg_from_gpu(a,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_s_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_s_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_s_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_s_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_csmv + end interface + interface + subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_csmm + end interface + + interface + subroutine psb_s_cuda_csrg_scal(d,a,info,side) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_cuda_csrg_scal + end interface + + interface + subroutine psb_s_cuda_csrg_scals(d,a,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_cuda_csrg_sizeof + + function s_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function s_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine s_cuda_csrg_set_host(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_cuda_csrg_set_host + + subroutine s_cuda_csrg_set_dev(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_cuda_csrg_set_dev + + subroutine s_cuda_csrg_set_sync(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_cuda_csrg_set_sync + + function s_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_cuda_csrg_is_dev + + function s_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_cuda_csrg_is_host + + function s_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_cuda_csrg_is_sync + + + subroutine s_cuda_csrg_sync(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine s_cuda_csrg_sync + + subroutine s_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_s_csr_sparse_mat%free() + + return + + end subroutine s_cuda_csrg_free + + subroutine s_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine s_cuda_csrg_finalize + +#else + interface + subroutine psb_s_cuda_csrg_mold(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_mold + end interface + +#endif + +end module psb_s_cuda_csrg_mat_mod diff --git a/cuda/psb_s_diag_mat_mod.F90 b/cuda/psb_s_cuda_diag_mat_mod.F90 similarity index 52% rename from cuda/psb_s_diag_mat_mod.F90 rename to cuda/psb_s_cuda_diag_mat_mod.F90 index 1ed54f88..709cd728 100644 --- a/cuda/psb_s_diag_mat_mod.F90 +++ b/cuda/psb_s_cuda_diag_mat_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_s_diag_mat_mod +module psb_s_cuda_diag_mat_mod use iso_c_binding use psb_base_mod use psb_s_dia_mat_mod - type, extends(psb_s_dia_sparse_mat) :: psb_s_diag_sparse_mat + type, extends(psb_s_dia_sparse_mat) :: psb_s_cuda_diag_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -48,170 +48,170 @@ module psb_s_diag_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => s_diag_get_fmt - procedure, pass(a) :: sizeof => s_diag_sizeof - procedure, pass(a) :: vect_mv => psb_s_diag_vect_mv -! procedure, pass(a) :: csmm => psb_s_diag_csmm - procedure, pass(a) :: csmv => psb_s_diag_csmv -! procedure, pass(a) :: in_vect_sv => psb_s_diag_inner_vect_sv -! procedure, pass(a) :: scals => psb_s_diag_scals -! procedure, pass(a) :: scalv => psb_s_diag_scal -! procedure, pass(a) :: reallocate_nz => psb_s_diag_reallocate_nz -! procedure, pass(a) :: allocate_mnnz => psb_s_diag_allocate_mnnz + procedure, nopass :: get_fmt => s_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => s_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_s_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_s_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_s_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_s_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_s_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_s_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_s_cuda_diag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_diag_from_coo -! procedure, pass(a) :: cp_from_fmt => psb_s_cp_diag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_diag_from_coo -! procedure, pass(a) :: mv_from_fmt => psb_s_mv_diag_from_fmt - procedure, pass(a) :: free => s_diag_free - procedure, pass(a) :: mold => psb_s_diag_mold - procedure, pass(a) :: to_gpu => psb_s_diag_to_gpu - final :: s_diag_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => s_cuda_diag_free + procedure, pass(a) :: mold => psb_s_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_diag_to_gpu + final :: s_cuda_diag_finalize #else contains - procedure, pass(a) :: mold => psb_s_diag_mold + procedure, pass(a) :: mold => psb_s_cuda_diag_mold #endif - end type psb_s_diag_sparse_mat + end type psb_s_cuda_diag_sparse_mat #ifdef HAVE_SPGPU - private :: s_diag_get_nzeros, s_diag_free, s_diag_get_fmt, & - & s_diag_get_size, s_diag_sizeof, s_diag_get_nz_row + private :: s_cuda_diag_get_nzeros, s_cuda_diag_free, s_cuda_diag_get_fmt, & + & s_cuda_diag_get_size, s_cuda_diag_sizeof, s_cuda_diag_get_nz_row interface - subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_diag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_diag_vect_mv + end subroutine psb_s_cuda_diag_vect_mv end interface interface - subroutine psb_s_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_s_diag_sparse_mat, psb_spk_, psb_s_base_vect_type - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_cuda_diag_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_diag_inner_vect_sv + end subroutine psb_s_cuda_diag_inner_vect_sv end interface interface - subroutine psb_s_diag_reallocate_nz(nz,a) - import :: psb_s_diag_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_diag_reallocate_nz(nz,a) + import :: psb_s_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_s_diag_sparse_mat), intent(inout) :: a - end subroutine psb_s_diag_reallocate_nz + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_diag_reallocate_nz end interface interface - subroutine psb_s_diag_allocate_mnnz(m,n,a,nz) - import :: psb_s_diag_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_s_diag_allocate_mnnz + end subroutine psb_s_cuda_diag_allocate_mnnz end interface interface - subroutine psb_s_diag_mold(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_mold(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_diag_mold + end subroutine psb_s_cuda_diag_mold end interface interface - subroutine psb_s_diag_to_gpu(a,info, nzrm) - import :: psb_s_diag_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_s_cuda_diag_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_s_diag_to_gpu + end subroutine psb_s_cuda_diag_to_gpu end interface interface - subroutine psb_s_cp_diag_from_coo(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_diag_from_coo(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_diag_from_coo + end subroutine psb_s_cuda_cp_diag_from_coo end interface interface - subroutine psb_s_cp_diag_from_fmt(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_diag_from_fmt + end subroutine psb_s_cuda_cp_diag_from_fmt end interface interface - subroutine psb_s_mv_diag_from_coo(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_diag_from_coo(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_diag_from_coo + end subroutine psb_s_cuda_mv_diag_from_coo end interface interface - subroutine psb_s_mv_diag_from_fmt(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_diag_from_fmt + end subroutine psb_s_cuda_mv_diag_from_fmt end interface interface - subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_diag_csmv + end subroutine psb_s_cuda_diag_csmv end interface interface - subroutine psb_s_diag_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_diag_csmm + end subroutine psb_s_cuda_diag_csmm end interface interface - subroutine psb_s_diag_scal(d,a,info, side) - import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_diag_scal(d,a,info, side) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_s_diag_scal + end subroutine psb_s_cuda_diag_scal end interface interface - subroutine psb_s_diag_scals(d,a,info) - import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_diag_scals(d,a,info) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_diag_scals + end subroutine psb_s_cuda_diag_scals end interface @@ -230,9 +230,9 @@ contains ! == =================================== - function s_diag_sizeof(a) result(res) + function s_cuda_diag_sizeof(a) result(res) implicit none - class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function s_diag_sizeof + end function s_cuda_diag_sizeof - function s_diag_get_fmt() result(res) + function s_cuda_diag_get_fmt() result(res) implicit none character(len=5) :: res res = 'DIAG' - end function s_diag_get_fmt + end function s_cuda_diag_get_fmt @@ -265,11 +265,11 @@ contains ! ! == =================================== - subroutine s_diag_free(a) + subroutine s_cuda_diag_free(a) use diagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) @@ -278,31 +278,31 @@ contains return - end subroutine s_diag_free + end subroutine s_cuda_diag_free - subroutine s_diag_finalize(a) + subroutine s_cuda_diag_finalize(a) use diagdev_mod implicit none - type(psb_s_diag_sparse_mat), intent(inout) :: a + type(psb_s_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine s_diag_finalize + end subroutine s_cuda_diag_finalize #else interface - subroutine psb_s_diag_mold(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_mold(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_diag_mold + end subroutine psb_s_cuda_diag_mold end interface #endif -end module psb_s_diag_mat_mod +end module psb_s_cuda_diag_mat_mod diff --git a/cuda/psb_s_dnsg_mat_mod.F90 b/cuda/psb_s_cuda_dnsg_mat_mod.F90 similarity index 51% rename from cuda/psb_s_dnsg_mat_mod.F90 rename to cuda/psb_s_cuda_dnsg_mat_mod.F90 index 1c531463..b01c8365 100644 --- a/cuda/psb_s_dnsg_mat_mod.F90 +++ b/cuda/psb_s_cuda_dnsg_mat_mod.F90 @@ -30,14 +30,14 @@ ! -module psb_s_dnsg_mat_mod +module psb_s_cuda_dnsg_mat_mod use iso_c_binding use psb_s_mat_mod use psb_s_dns_mat_mod use dnsdev_mod - type, extends(psb_s_dns_sparse_mat) :: psb_s_dnsg_sparse_mat + type, extends(psb_s_dns_sparse_mat) :: psb_s_cuda_dnsg_sparse_mat ! ! ITPACK/DNS format, extended. ! We are adding here the routines to create a copy of the data @@ -49,169 +49,169 @@ module psb_s_dnsg_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => s_dnsg_get_fmt - ! procedure, pass(a) :: sizeof => s_dnsg_sizeof - procedure, pass(a) :: vect_mv => psb_s_dnsg_vect_mv -!!$ procedure, pass(a) :: csmm => psb_s_dnsg_csmm -!!$ procedure, pass(a) :: csmv => psb_s_dnsg_csmv -!!$ procedure, pass(a) :: in_vect_sv => psb_s_dnsg_inner_vect_sv -!!$ procedure, pass(a) :: scals => psb_s_dnsg_scals -!!$ procedure, pass(a) :: scalv => psb_s_dnsg_scal -!!$ procedure, pass(a) :: reallocate_nz => psb_s_dnsg_reallocate_nz -!!$ procedure, pass(a) :: allocate_mnnz => psb_s_dnsg_allocate_mnnz + procedure, nopass :: get_fmt => s_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => s_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_s_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_s_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_s_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_s_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_s_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_s_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_s_cuda_dnsg_allocate_mnnz ! Note: we *do* need the TO methods, because of the need to invoke SYNC ! - procedure, pass(a) :: cp_from_coo => psb_s_cp_dnsg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_cp_dnsg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_dnsg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_mv_dnsg_from_fmt - procedure, pass(a) :: free => s_dnsg_free - procedure, pass(a) :: mold => psb_s_dnsg_mold - procedure, pass(a) :: to_gpu => psb_s_dnsg_to_gpu - final :: s_dnsg_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => s_cuda_dnsg_free + procedure, pass(a) :: mold => psb_s_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_dnsg_to_gpu + final :: s_cuda_dnsg_finalize #else contains - procedure, pass(a) :: mold => psb_s_dnsg_mold + procedure, pass(a) :: mold => psb_s_cuda_dnsg_mold #endif - end type psb_s_dnsg_sparse_mat + end type psb_s_cuda_dnsg_sparse_mat #ifdef HAVE_SPGPU - private :: s_dnsg_get_nzeros, s_dnsg_free, s_dnsg_get_fmt, & - & s_dnsg_get_size, s_dnsg_get_nz_row + private :: s_cuda_dnsg_get_nzeros, s_cuda_dnsg_free, s_cuda_dnsg_get_fmt, & + & s_cuda_dnsg_get_size, s_cuda_dnsg_get_nz_row interface - subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_dnsg_vect_mv + end subroutine psb_s_cuda_dnsg_vect_mv end interface !!$ !!$ interface -!!$ subroutine psb_s_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_s_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type -!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_s_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_dnsg_inner_vect_sv +!!$ end subroutine psb_s_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_s_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_s_dnsg_reallocate_nz +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_s_dnsg_allocate_mnnz +!!$ end subroutine psb_s_cuda_dnsg_allocate_mnnz !!$ end interface interface - subroutine psb_s_dnsg_mold(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_dnsg_mold(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_dnsg_mold + end subroutine psb_s_cuda_dnsg_mold end interface interface - subroutine psb_s_dnsg_to_gpu(a,info) - import :: psb_s_dnsg_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_dnsg_to_gpu(a,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_dnsg_to_gpu + end subroutine psb_s_cuda_dnsg_to_gpu end interface interface - subroutine psb_s_cp_dnsg_from_coo(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_dnsg_from_coo + end subroutine psb_s_cuda_cp_dnsg_from_coo end interface interface - subroutine psb_s_cp_dnsg_from_fmt(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_dnsg_from_fmt + end subroutine psb_s_cuda_cp_dnsg_from_fmt end interface interface - subroutine psb_s_mv_dnsg_from_coo(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_dnsg_from_coo + end subroutine psb_s_cuda_mv_dnsg_from_coo end interface interface - subroutine psb_s_mv_dnsg_from_fmt(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_dnsg_from_fmt + end subroutine psb_s_cuda_mv_dnsg_from_fmt end interface !!$ interface -!!$ subroutine psb_s_dnsg_csmv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta, x(:) !!$ real(psb_spk_), intent(inout) :: y(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_dnsg_csmv +!!$ end subroutine psb_s_cuda_dnsg_csmv !!$ end interface !!$ interface -!!$ subroutine psb_s_dnsg_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) !!$ real(psb_spk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_dnsg_csmm +!!$ end subroutine psb_s_cuda_dnsg_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_dnsg_scal(d,a,info, side) -!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ real(psb_spk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_s_dnsg_scal +!!$ end subroutine psb_s_cuda_dnsg_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_dnsg_scals(d,a,info) -!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ real(psb_spk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_s_dnsg_scals +!!$ end subroutine psb_s_cuda_dnsg_scals !!$ end interface !!$ @@ -231,11 +231,11 @@ contains - function s_dnsg_get_fmt() result(res) + function s_cuda_dnsg_get_fmt() result(res) implicit none character(len=5) :: res res = 'DNSG' - end function s_dnsg_get_fmt + end function s_cuda_dnsg_get_fmt @@ -251,11 +251,11 @@ contains ! ! == =================================== - subroutine s_dnsg_free(a) + subroutine s_cuda_dnsg_free(a) use dnsdev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) @@ -264,31 +264,31 @@ contains return - end subroutine s_dnsg_free + end subroutine s_cuda_dnsg_free - subroutine s_dnsg_finalize(a) + subroutine s_cuda_dnsg_finalize(a) use dnsdev_mod implicit none - type(psb_s_dnsg_sparse_mat), intent(inout) :: a + type(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine s_dnsg_finalize + end subroutine s_cuda_dnsg_finalize #else interface - subroutine psb_s_dnsg_mold(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_dnsg_mold(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_dnsg_mold + end subroutine psb_s_cuda_dnsg_mold end interface #endif -end module psb_s_dnsg_mat_mod +end module psb_s_cuda_dnsg_mat_mod diff --git a/cuda/psb_s_elg_mat_mod.F90 b/cuda/psb_s_cuda_elg_mat_mod.F90 similarity index 50% rename from cuda/psb_s_elg_mat_mod.F90 rename to cuda/psb_s_cuda_elg_mat_mod.F90 index 5c4eae9b..d6b003b5 100644 --- a/cuda/psb_s_elg_mat_mod.F90 +++ b/cuda/psb_s_cuda_elg_mat_mod.F90 @@ -30,18 +30,18 @@ ! -module psb_s_elg_mat_mod +module psb_s_cuda_elg_mat_mod use iso_c_binding use psb_s_mat_mod use psb_s_ell_mat_mod - use psb_i_gpu_vect_mod + use psb_i_cuda_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_s_ell_sparse_mat) :: psb_s_elg_sparse_mat + type, extends(psb_s_ell_sparse_mat) :: psb_s_cuda_elg_sparse_mat ! ! ITPACK/ELL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,221 +54,221 @@ module psb_s_elg_mat_mod integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => s_elg_get_fmt - procedure, pass(a) :: sizeof => s_elg_sizeof - procedure, pass(a) :: vect_mv => psb_s_elg_vect_mv - procedure, pass(a) :: csmm => psb_s_elg_csmm - procedure, pass(a) :: csmv => psb_s_elg_csmv - procedure, pass(a) :: in_vect_sv => psb_s_elg_inner_vect_sv - procedure, pass(a) :: scals => psb_s_elg_scals - procedure, pass(a) :: scalv => psb_s_elg_scal - procedure, pass(a) :: reallocate_nz => psb_s_elg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_s_elg_allocate_mnnz - procedure, pass(a) :: reinit => s_elg_reinit + procedure, nopass :: get_fmt => s_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_s_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_s_cuda_elg_scals + procedure, pass(a) :: scalv => psb_s_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => s_cuda_elg_reinit ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_elg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_cp_elg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_elg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_mv_elg_from_fmt - procedure, pass(a) :: free => s_elg_free - procedure, pass(a) :: mold => psb_s_elg_mold - procedure, pass(a) :: csput_a => psb_s_elg_csput_a - procedure, pass(a) :: csput_v => psb_s_elg_csput_v - procedure, pass(a) :: is_host => s_elg_is_host - procedure, pass(a) :: is_dev => s_elg_is_dev - procedure, pass(a) :: is_sync => s_elg_is_sync - procedure, pass(a) :: set_host => s_elg_set_host - procedure, pass(a) :: set_dev => s_elg_set_dev - procedure, pass(a) :: set_sync => s_elg_set_sync - procedure, pass(a) :: sync => s_elg_sync - procedure, pass(a) :: from_gpu => psb_s_elg_from_gpu - procedure, pass(a) :: to_gpu => psb_s_elg_to_gpu - procedure, pass(a) :: asb => psb_s_elg_asb - final :: s_elg_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => s_cuda_elg_free + procedure, pass(a) :: mold => psb_s_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_s_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_s_cuda_elg_csput_v + procedure, pass(a) :: is_host => s_cuda_elg_is_host + procedure, pass(a) :: is_dev => s_cuda_elg_is_dev + procedure, pass(a) :: is_sync => s_cuda_elg_is_sync + procedure, pass(a) :: set_host => s_cuda_elg_set_host + procedure, pass(a) :: set_dev => s_cuda_elg_set_dev + procedure, pass(a) :: set_sync => s_cuda_elg_set_sync + procedure, pass(a) :: sync => s_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_s_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_s_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_s_cuda_elg_asb + final :: s_cuda_elg_finalize #else contains - procedure, pass(a) :: mold => psb_s_elg_mold - procedure, pass(a) :: asb => psb_s_elg_asb + procedure, pass(a) :: mold => psb_s_cuda_elg_mold + procedure, pass(a) :: asb => psb_s_cuda_elg_asb #endif - end type psb_s_elg_sparse_mat + end type psb_s_cuda_elg_sparse_mat #ifdef HAVE_SPGPU - private :: s_elg_get_nzeros, s_elg_free, s_elg_get_fmt, & - & s_elg_get_size, s_elg_sizeof, s_elg_get_nz_row, s_elg_sync + private :: s_cuda_elg_get_nzeros, s_cuda_elg_free, s_cuda_elg_get_fmt, & + & s_cuda_elg_get_size, s_cuda_elg_sizeof, s_cuda_elg_get_nz_row, s_cuda_elg_sync interface - subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_elg_vect_mv + end subroutine psb_s_cuda_elg_vect_mv end interface interface - subroutine psb_s_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_s_elg_sparse_mat, psb_spk_, psb_s_base_vect_type - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_cuda_elg_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_elg_inner_vect_sv + end subroutine psb_s_cuda_elg_inner_vect_sv end interface interface - subroutine psb_s_elg_reallocate_nz(nz,a) - import :: psb_s_elg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_elg_reallocate_nz(nz,a) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_s_elg_sparse_mat), intent(inout) :: a - end subroutine psb_s_elg_reallocate_nz + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_elg_reallocate_nz end interface interface - subroutine psb_s_elg_allocate_mnnz(m,n,a,nz) - import :: psb_s_elg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_s_elg_allocate_mnnz + end subroutine psb_s_cuda_elg_allocate_mnnz end interface interface - subroutine psb_s_elg_mold(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_mold(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_mold + end subroutine psb_s_cuda_elg_mold end interface interface - subroutine psb_s_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& & imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_csput_a + end subroutine psb_s_cuda_elg_csput_a end interface interface - subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_s_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_s_base_vect_type,& + subroutine psb_s_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_s_base_vect_type,& & psb_i_base_vect_type - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_csput_v + end subroutine psb_s_cuda_elg_csput_v end interface interface - subroutine psb_s_elg_from_gpu(a,info) - import :: psb_s_elg_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_elg_from_gpu(a,info) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_from_gpu + end subroutine psb_s_cuda_elg_from_gpu end interface interface - subroutine psb_s_elg_to_gpu(a,info, nzrm) - import :: psb_s_elg_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_s_elg_to_gpu + end subroutine psb_s_cuda_elg_to_gpu end interface interface - subroutine psb_s_cp_elg_from_coo(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_elg_from_coo + end subroutine psb_s_cuda_cp_elg_from_coo end interface interface - subroutine psb_s_cp_elg_from_fmt(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_elg_from_fmt + end subroutine psb_s_cuda_cp_elg_from_fmt end interface interface - subroutine psb_s_mv_elg_from_coo(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_elg_from_coo(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_elg_from_coo + end subroutine psb_s_cuda_mv_elg_from_coo end interface interface - subroutine psb_s_mv_elg_from_fmt(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_elg_from_fmt + end subroutine psb_s_cuda_mv_elg_from_fmt end interface interface - subroutine psb_s_elg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_elg_csmv + end subroutine psb_s_cuda_elg_csmv end interface interface - subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_elg_csmm + end subroutine psb_s_cuda_elg_csmm end interface interface - subroutine psb_s_elg_scal(d,a,info, side) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_elg_scal(d,a,info, side) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_s_elg_scal + end subroutine psb_s_cuda_elg_scal end interface interface - subroutine psb_s_elg_scals(d,a,info) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_elg_scals(d,a,info) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_scals + end subroutine psb_s_cuda_elg_scals end interface interface - subroutine psb_s_elg_asb(a) - import :: psb_s_elg_sparse_mat - class(psb_s_elg_sparse_mat), intent(inout) :: a - end subroutine psb_s_elg_asb + subroutine psb_s_cuda_elg_asb(a) + import :: psb_s_cuda_elg_sparse_mat + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_elg_asb end interface @@ -287,9 +287,9 @@ contains ! == =================================== - function s_elg_sizeof(a) result(res) + function s_cuda_elg_sizeof(a) result(res) implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res if (a%is_dev()) call a%sync() @@ -302,13 +302,13 @@ contains ! on the GPU device side? ! res = 2*res - end function s_elg_sizeof + end function s_cuda_elg_sizeof - function s_elg_get_fmt() result(res) + function s_cuda_elg_get_fmt() result(res) implicit none character(len=5) :: res res = 'ELG' - end function s_elg_get_fmt + end function s_cuda_elg_get_fmt @@ -323,12 +323,12 @@ contains ! ! ! == =================================== - subroutine s_elg_reinit(a,clear) + subroutine s_cuda_elg_reinit(a,clear) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a logical, intent(in), optional :: clear integer(psb_ipk_) :: isz, err_act character(len=20) :: name='reinit' @@ -367,14 +367,14 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine s_elg_reinit + end subroutine s_cuda_elg_reinit - subroutine s_elg_free(a) + subroutine s_cuda_elg_free(a) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) @@ -384,12 +384,12 @@ contains return - end subroutine s_elg_free + end subroutine s_cuda_elg_free - subroutine s_elg_sync(a) + subroutine s_cuda_elg_sync(a) implicit none - class(psb_s_elg_sparse_mat), target, intent(in) :: a - class(psb_s_elg_sparse_mat), pointer :: tmpa + class(psb_s_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -401,83 +401,83 @@ contains call tmpa%set_sync() return - end subroutine s_elg_sync + end subroutine s_cuda_elg_sync - subroutine s_elg_set_host(a) + subroutine s_cuda_elg_set_host(a) implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine s_elg_set_host + end subroutine s_cuda_elg_set_host - subroutine s_elg_set_dev(a) + subroutine s_cuda_elg_set_dev(a) implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine s_elg_set_dev + end subroutine s_cuda_elg_set_dev - subroutine s_elg_set_sync(a) + subroutine s_cuda_elg_set_sync(a) implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine s_elg_set_sync + end subroutine s_cuda_elg_set_sync - function s_elg_is_dev(a) result(res) + function s_cuda_elg_is_dev(a) result(res) implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function s_elg_is_dev + end function s_cuda_elg_is_dev - function s_elg_is_host(a) result(res) + function s_cuda_elg_is_host(a) result(res) implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function s_elg_is_host + end function s_cuda_elg_is_host - function s_elg_is_sync(a) result(res) + function s_cuda_elg_is_sync(a) result(res) implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function s_elg_is_sync + end function s_cuda_elg_is_sync - subroutine s_elg_finalize(a) + subroutine s_cuda_elg_finalize(a) use elldev_mod implicit none - type(psb_s_elg_sparse_mat), intent(inout) :: a + type(psb_s_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine s_elg_finalize + end subroutine s_cuda_elg_finalize #else interface - subroutine psb_s_elg_asb(a) - import :: psb_s_elg_sparse_mat - class(psb_s_elg_sparse_mat), intent(inout) :: a - end subroutine psb_s_elg_asb + subroutine psb_s_cuda_elg_asb(a) + import :: psb_s_cuda_elg_sparse_mat + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_elg_asb end interface interface - subroutine psb_s_elg_mold(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_mold(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_mold + end subroutine psb_s_cuda_elg_mold end interface #endif -end module psb_s_elg_mat_mod +end module psb_s_cuda_elg_mat_mod diff --git a/cuda/psb_s_hdiag_mat_mod.F90 b/cuda/psb_s_cuda_hdiag_mat_mod.F90 similarity index 50% rename from cuda/psb_s_hdiag_mat_mod.F90 rename to cuda/psb_s_cuda_hdiag_mat_mod.F90 index be0ef2b2..0a66ff09 100644 --- a/cuda/psb_s_hdiag_mat_mod.F90 +++ b/cuda/psb_s_cuda_hdiag_mat_mod.F90 @@ -30,182 +30,182 @@ ! -module psb_s_hdiag_mat_mod +module psb_s_cuda_hdiag_mat_mod use iso_c_binding use psb_base_mod use psb_s_hdia_mat_mod - type, extends(psb_s_hdia_sparse_mat) :: psb_s_hdiag_sparse_mat + type, extends(psb_s_hdia_sparse_mat) :: psb_s_cuda_hdiag_sparse_mat ! #ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => s_hdiag_get_fmt - ! procedure, pass(a) :: sizeof => s_hdiag_sizeof - procedure, pass(a) :: vect_mv => psb_s_hdiag_vect_mv - ! procedure, pass(a) :: csmm => psb_s_hdiag_csmm - procedure, pass(a) :: csmv => psb_s_hdiag_csmv - ! procedure, pass(a) :: in_vect_sv => psb_s_hdiag_inner_vect_sv - ! procedure, pass(a) :: scals => psb_s_hdiag_scals - ! procedure, pass(a) :: scalv => psb_s_hdiag_scal - ! procedure, pass(a) :: reallocate_nz => psb_s_hdiag_reallocate_nz - ! procedure, pass(a) :: allocate_mnnz => psb_s_hdiag_allocate_mnnz + procedure, nopass :: get_fmt => s_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => s_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_s_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_s_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_s_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_s_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_s_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_s_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_s_cuda_hdiag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_hdiag_from_coo - ! procedure, pass(a) :: cp_from_fmt => psb_s_cp_hdiag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_hdiag_from_coo - ! procedure, pass(a) :: mv_from_fmt => psb_s_mv_hdiag_from_fmt - procedure, pass(a) :: free => s_hdiag_free - procedure, pass(a) :: mold => psb_s_hdiag_mold - procedure, pass(a) :: to_gpu => psb_s_hdiag_to_gpu - final :: s_hdiag_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => s_cuda_hdiag_free + procedure, pass(a) :: mold => psb_s_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_hdiag_to_gpu + final :: s_cuda_hdiag_finalize #else contains - procedure, pass(a) :: mold => psb_s_hdiag_mold + procedure, pass(a) :: mold => psb_s_cuda_hdiag_mold #endif - end type psb_s_hdiag_sparse_mat + end type psb_s_cuda_hdiag_sparse_mat #ifdef HAVE_SPGPU - private :: s_hdiag_get_nzeros, s_hdiag_free, s_hdiag_get_fmt, & - & s_hdiag_get_size, s_hdiag_sizeof, s_hdiag_get_nz_row + private :: s_cuda_hdiag_get_nzeros, s_cuda_hdiag_free, s_cuda_hdiag_get_fmt, & + & s_cuda_hdiag_get_size, s_cuda_hdiag_sizeof, s_cuda_hdiag_get_nz_row interface - subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hdiag_vect_mv + end subroutine psb_s_cuda_hdiag_vect_mv end interface !!$ interface -!!$ subroutine psb_s_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_s_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type -!!$ class(psb_s_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_s_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_hdiag_inner_vect_sv +!!$ end subroutine psb_s_cuda_hdiag_inner_vect_sv !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_hdiag_reallocate_nz(nz,a) -!!$ import :: psb_s_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_s_hdiag_reallocate_nz +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_cuda_hdiag_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_hdiag_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_s_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_s_hdiag_allocate_mnnz +!!$ end subroutine psb_s_cuda_hdiag_allocate_mnnz !!$ end interface interface - subroutine psb_s_hdiag_mold(a,b,info) - import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hdiag_mold(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hdiag_mold + end subroutine psb_s_cuda_hdiag_mold end interface interface - subroutine psb_s_hdiag_to_gpu(a,info) - import :: psb_s_hdiag_sparse_mat, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hdiag_to_gpu(a,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hdiag_to_gpu + end subroutine psb_s_cuda_hdiag_to_gpu end interface interface - subroutine psb_s_cp_hdiag_from_coo(a,b,info) - import :: psb_s_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_hdiag_from_coo + end subroutine psb_s_cuda_cp_hdiag_from_coo end interface !!$ interface -!!$ subroutine psb_s_cp_hdiag_from_fmt(a,b,info) -!!$ import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_s_base_sparse_mat), intent(in) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_s_cp_hdiag_from_fmt +!!$ end subroutine psb_s_cuda_cp_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_s_mv_hdiag_from_coo(a,b,info) - import :: psb_s_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_hdiag_from_coo + end subroutine psb_s_cuda_mv_hdiag_from_coo end interface !!$ !!$ interface -!!$ subroutine psb_s_mv_hdiag_from_fmt(a,b,info) -!!$ import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_s_base_sparse_mat), intent(inout) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_s_mv_hdiag_from_fmt +!!$ end subroutine psb_s_cuda_mv_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hdiag_csmv + end subroutine psb_s_cuda_hdiag_csmv end interface !!$ interface -!!$ subroutine psb_s_hdiag_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) !!$ real(psb_spk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_hdiag_csmm +!!$ end subroutine psb_s_cuda_hdiag_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_hdiag_scal(d,a,info, side) -!!$ import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ real(psb_spk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_s_hdiag_scal +!!$ end subroutine psb_s_cuda_hdiag_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_hdiag_scals(d,a,info) -!!$ import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ real(psb_spk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_s_hdiag_scals +!!$ end subroutine psb_s_cuda_hdiag_scals !!$ end interface !!$ @@ -223,11 +223,11 @@ contains ! ! == =================================== - function s_hdiag_get_fmt() result(res) + function s_cuda_hdiag_get_fmt() result(res) implicit none character(len=5) :: res res = 'HDIAG' - end function s_hdiag_get_fmt + end function s_cuda_hdiag_get_fmt @@ -243,11 +243,11 @@ contains ! ! == =================================== - subroutine s_hdiag_free(a) + subroutine s_cuda_hdiag_free(a) use hdiagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -256,12 +256,12 @@ contains return - end subroutine s_hdiag_free + end subroutine s_cuda_hdiag_free - subroutine s_hdiag_finalize(a) + subroutine s_cuda_hdiag_finalize(a) use hdiagdev_mod implicit none - type(psb_s_hdiag_sparse_mat), intent(inout) :: a + type(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -269,19 +269,19 @@ contains call a%psb_s_hdia_sparse_mat%free() return - end subroutine s_hdiag_finalize + end subroutine s_cuda_hdiag_finalize #else interface - subroutine psb_s_hdiag_mold(a,b,info) - import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hdiag_mold(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hdiag_mold + end subroutine psb_s_cuda_hdiag_mold end interface #endif -end module psb_s_hdiag_mat_mod +end module psb_s_cuda_hdiag_mat_mod diff --git a/cuda/psb_s_hlg_mat_mod.F90 b/cuda/psb_s_cuda_hlg_mat_mod.F90 similarity index 50% rename from cuda/psb_s_hlg_mat_mod.F90 rename to cuda/psb_s_cuda_hlg_mat_mod.F90 index 8f896e4b..81b94e5d 100644 --- a/cuda/psb_s_hlg_mat_mod.F90 +++ b/cuda/psb_s_cuda_hlg_mat_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_s_hlg_mat_mod +module psb_s_cuda_hlg_mat_mod use iso_c_binding use psb_s_mat_mod @@ -41,7 +41,7 @@ module psb_s_hlg_mat_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_s_hll_sparse_mat) :: psb_s_hlg_sparse_mat + type, extends(psb_s_hll_sparse_mat) :: psb_s_cuda_hlg_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,186 +54,186 @@ module psb_s_hlg_mat_mod integer :: devstate = is_host contains - procedure, nopass :: get_fmt => s_hlg_get_fmt - procedure, pass(a) :: sizeof => s_hlg_sizeof - procedure, pass(a) :: vect_mv => psb_s_hlg_vect_mv - procedure, pass(a) :: csmm => psb_s_hlg_csmm - procedure, pass(a) :: csmv => psb_s_hlg_csmv - procedure, pass(a) :: in_vect_sv => psb_s_hlg_inner_vect_sv - procedure, pass(a) :: scals => psb_s_hlg_scals - procedure, pass(a) :: scalv => psb_s_hlg_scal - procedure, pass(a) :: reallocate_nz => psb_s_hlg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_s_hlg_allocate_mnnz + procedure, nopass :: get_fmt => s_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_s_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_s_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_s_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_hlg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_hlg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_cp_hlg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_hlg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_mv_hlg_from_fmt - procedure, pass(a) :: free => s_hlg_free - procedure, pass(a) :: mold => psb_s_hlg_mold - procedure, pass(a) :: is_host => s_hlg_is_host - procedure, pass(a) :: is_dev => s_hlg_is_dev - procedure, pass(a) :: is_sync => s_hlg_is_sync - procedure, pass(a) :: set_host => s_hlg_set_host - procedure, pass(a) :: set_dev => s_hlg_set_dev - procedure, pass(a) :: set_sync => s_hlg_set_sync - procedure, pass(a) :: sync => s_hlg_sync - procedure, pass(a) :: from_gpu => psb_s_hlg_from_gpu - procedure, pass(a) :: to_gpu => psb_s_hlg_to_gpu - final :: s_hlg_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => s_cuda_hlg_free + procedure, pass(a) :: mold => psb_s_cuda_hlg_mold + procedure, pass(a) :: is_host => s_cuda_hlg_is_host + procedure, pass(a) :: is_dev => s_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => s_cuda_hlg_is_sync + procedure, pass(a) :: set_host => s_cuda_hlg_set_host + procedure, pass(a) :: set_dev => s_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => s_cuda_hlg_set_sync + procedure, pass(a) :: sync => s_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_s_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_s_cuda_hlg_to_gpu + final :: s_cuda_hlg_finalize #else contains - procedure, pass(a) :: mold => psb_s_hlg_mold + procedure, pass(a) :: mold => psb_s_cuda_hlg_mold #endif - end type psb_s_hlg_sparse_mat + end type psb_s_cuda_hlg_sparse_mat #ifdef HAVE_SPGPU - private :: s_hlg_get_nzeros, s_hlg_free, s_hlg_get_fmt, & - & s_hlg_get_size, s_hlg_sizeof, s_hlg_get_nz_row + private :: s_cuda_hlg_get_nzeros, s_cuda_hlg_free, s_cuda_hlg_get_fmt, & + & s_cuda_hlg_get_size, s_cuda_hlg_sizeof, s_cuda_hlg_get_nz_row interface - subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hlg_vect_mv + end subroutine psb_s_cuda_hlg_vect_mv end interface interface - subroutine psb_s_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_s_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hlg_inner_vect_sv + end subroutine psb_s_cuda_hlg_inner_vect_sv end interface interface - subroutine psb_s_hlg_reallocate_nz(nz,a) - import :: psb_s_hlg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_hlg_reallocate_nz(nz,a) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_s_hlg_sparse_mat), intent(inout) :: a - end subroutine psb_s_hlg_reallocate_nz + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_hlg_reallocate_nz end interface interface - subroutine psb_s_hlg_allocate_mnnz(m,n,a,nz) - import :: psb_s_hlg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_s_hlg_allocate_mnnz + end subroutine psb_s_cuda_hlg_allocate_mnnz end interface interface - subroutine psb_s_hlg_mold(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_mold(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hlg_mold + end subroutine psb_s_cuda_hlg_mold end interface interface - subroutine psb_s_hlg_from_gpu(a,info) - import :: psb_s_hlg_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hlg_from_gpu(a,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hlg_from_gpu + end subroutine psb_s_cuda_hlg_from_gpu end interface interface - subroutine psb_s_hlg_to_gpu(a,info, nzrm) - import :: psb_s_hlg_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_s_hlg_to_gpu + end subroutine psb_s_cuda_hlg_to_gpu end interface interface - subroutine psb_s_cp_hlg_from_coo(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_hlg_from_coo + end subroutine psb_s_cuda_cp_hlg_from_coo end interface interface - subroutine psb_s_cp_hlg_from_fmt(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_hlg_from_fmt + end subroutine psb_s_cuda_cp_hlg_from_fmt end interface interface - subroutine psb_s_mv_hlg_from_coo(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_hlg_from_coo + end subroutine psb_s_cuda_mv_hlg_from_coo end interface interface - subroutine psb_s_mv_hlg_from_fmt(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_hlg_from_fmt + end subroutine psb_s_cuda_mv_hlg_from_fmt end interface interface - subroutine psb_s_hlg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hlg_csmv + end subroutine psb_s_cuda_hlg_csmv end interface interface - subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hlg_csmm + end subroutine psb_s_cuda_hlg_csmm end interface interface - subroutine psb_s_hlg_scal(d,a,info, side) - import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hlg_scal(d,a,info, side) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_s_hlg_scal + end subroutine psb_s_cuda_hlg_scal end interface interface - subroutine psb_s_hlg_scals(d,a,info) - import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hlg_scals(d,a,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hlg_scals + end subroutine psb_s_cuda_hlg_scals end interface @@ -252,9 +252,9 @@ contains ! == =================================== - function s_hlg_sizeof(a) result(res) + function s_cuda_hlg_sizeof(a) result(res) implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res @@ -269,13 +269,13 @@ contains ! on the GPU device side? ! res = 2*res - end function s_hlg_sizeof + end function s_cuda_hlg_sizeof - function s_hlg_get_fmt() result(res) + function s_cuda_hlg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HLG' - end function s_hlg_get_fmt + end function s_cuda_hlg_get_fmt @@ -291,11 +291,11 @@ contains ! ! == =================================== - subroutine s_hlg_free(a) + subroutine s_cuda_hlg_free(a) use hlldev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) @@ -304,13 +304,13 @@ contains return - end subroutine s_hlg_free + end subroutine s_cuda_hlg_free - subroutine s_hlg_sync(a) + subroutine s_cuda_hlg_sync(a) implicit none - class(psb_s_hlg_sparse_mat), target, intent(in) :: a - class(psb_s_hlg_sparse_mat), pointer :: tmpa + class(psb_s_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -322,77 +322,77 @@ contains call tmpa%set_sync() return - end subroutine s_hlg_sync + end subroutine s_cuda_hlg_sync - subroutine s_hlg_set_host(a) + subroutine s_cuda_hlg_set_host(a) implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine s_hlg_set_host + end subroutine s_cuda_hlg_set_host - subroutine s_hlg_set_dev(a) + subroutine s_cuda_hlg_set_dev(a) implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine s_hlg_set_dev + end subroutine s_cuda_hlg_set_dev - subroutine s_hlg_set_sync(a) + subroutine s_cuda_hlg_set_sync(a) implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine s_hlg_set_sync + end subroutine s_cuda_hlg_set_sync - function s_hlg_is_dev(a) result(res) + function s_cuda_hlg_is_dev(a) result(res) implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function s_hlg_is_dev + end function s_cuda_hlg_is_dev - function s_hlg_is_host(a) result(res) + function s_cuda_hlg_is_host(a) result(res) implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function s_hlg_is_host + end function s_cuda_hlg_is_host - function s_hlg_is_sync(a) result(res) + function s_cuda_hlg_is_sync(a) result(res) implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function s_hlg_is_sync + end function s_cuda_hlg_is_sync - subroutine s_hlg_finalize(a) + subroutine s_cuda_hlg_finalize(a) use hlldev_mod implicit none - type(psb_s_hlg_sparse_mat), intent(inout) :: a + type(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine s_hlg_finalize + end subroutine s_cuda_hlg_finalize #else interface - subroutine psb_s_hlg_mold(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_mold(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hlg_mold + end subroutine psb_s_cuda_hlg_mold end interface #endif -end module psb_s_hlg_mat_mod +end module psb_s_cuda_hlg_mat_mod diff --git a/cuda/psb_s_hybg_mat_mod.F90 b/cuda/psb_s_cuda_hybg_mat_mod.F90 similarity index 52% rename from cuda/psb_s_hybg_mat_mod.F90 rename to cuda/psb_s_cuda_hybg_mat_mod.F90 index 5a8e0e5d..ae76aac1 100644 --- a/cuda/psb_s_hybg_mat_mod.F90 +++ b/cuda/psb_s_cuda_hybg_mat_mod.F90 @@ -31,13 +31,13 @@ #if CUDA_SHORT_VERSION <= 10 -module psb_s_hybg_mat_mod +module psb_s_cuda_hybg_mat_mod use iso_c_binding use psb_s_mat_mod use cusparse_mod - type, extends(psb_s_csr_sparse_mat) :: psb_s_hybg_sparse_mat + type, extends(psb_s_csr_sparse_mat) :: psb_s_cuda_hybg_sparse_mat ! ! HYBG. An interface to the cuSPARSE HYB ! On the CPU side we keep a CSR storage. @@ -49,170 +49,170 @@ module psb_s_hybg_mat_mod type(s_Hmat) :: deviceMat contains - procedure, nopass :: get_fmt => s_hybg_get_fmt - procedure, pass(a) :: sizeof => s_hybg_sizeof - procedure, pass(a) :: vect_mv => psb_s_hybg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_s_hybg_inner_vect_sv - procedure, pass(a) :: csmm => psb_s_hybg_csmm - procedure, pass(a) :: csmv => psb_s_hybg_csmv - procedure, pass(a) :: scals => psb_s_hybg_scals - procedure, pass(a) :: scalv => psb_s_hybg_scal - procedure, pass(a) :: reallocate_nz => psb_s_hybg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_s_hybg_allocate_mnnz + procedure, nopass :: get_fmt => s_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_s_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_s_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_hybg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_hybg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_cp_hybg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_hybg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_mv_hybg_from_fmt - procedure, pass(a) :: free => s_hybg_free - procedure, pass(a) :: mold => psb_s_hybg_mold - procedure, pass(a) :: to_gpu => psb_s_hybg_to_gpu - final :: s_hybg_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => s_cuda_hybg_free + procedure, pass(a) :: mold => psb_s_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_hybg_to_gpu + final :: s_cuda_hybg_finalize #else contains - procedure, pass(a) :: mold => psb_s_hybg_mold + procedure, pass(a) :: mold => psb_s_cuda_hybg_mold #endif - end type psb_s_hybg_sparse_mat + end type psb_s_cuda_hybg_sparse_mat #ifdef HAVE_SPGPU - private :: s_hybg_get_nzeros, s_hybg_free, s_hybg_get_fmt, & - & s_hybg_get_size, s_hybg_sizeof, s_hybg_get_nz_row + private :: s_cuda_hybg_get_nzeros, s_cuda_hybg_free, s_cuda_hybg_get_fmt, & + & s_cuda_hybg_get_size, s_cuda_hybg_sizeof, s_cuda_hybg_get_nz_row interface - subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hybg_inner_vect_sv + end subroutine psb_s_cuda_hybg_inner_vect_sv end interface interface - subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hybg_vect_mv + end subroutine psb_s_cuda_hybg_vect_mv end interface interface - subroutine psb_s_hybg_reallocate_nz(nz,a) - import :: psb_s_hybg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_hybg_reallocate_nz(nz,a) + import :: psb_s_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_s_hybg_sparse_mat), intent(inout) :: a - end subroutine psb_s_hybg_reallocate_nz + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_hybg_reallocate_nz end interface interface - subroutine psb_s_hybg_allocate_mnnz(m,n,a,nz) - import :: psb_s_hybg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_s_hybg_allocate_mnnz + end subroutine psb_s_cuda_hybg_allocate_mnnz end interface interface - subroutine psb_s_hybg_mold(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_mold(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hybg_mold + end subroutine psb_s_cuda_hybg_mold end interface interface - subroutine psb_s_hybg_to_gpu(a,info, nzrm) - import :: psb_s_hybg_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_s_hybg_to_gpu + end subroutine psb_s_cuda_hybg_to_gpu end interface interface - subroutine psb_s_cp_hybg_from_coo(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_hybg_from_coo + end subroutine psb_s_cuda_cp_hybg_from_coo end interface interface - subroutine psb_s_cp_hybg_from_fmt(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_hybg_from_fmt + end subroutine psb_s_cuda_cp_hybg_from_fmt end interface interface - subroutine psb_s_mv_hybg_from_coo(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_hybg_from_coo + end subroutine psb_s_cuda_mv_hybg_from_coo end interface interface - subroutine psb_s_mv_hybg_from_fmt(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_hybg_from_fmt + end subroutine psb_s_cuda_mv_hybg_from_fmt end interface interface - subroutine psb_s_hybg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hybg_csmv + end subroutine psb_s_cuda_hybg_csmv end interface interface - subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hybg_csmm + end subroutine psb_s_cuda_hybg_csmm end interface interface - subroutine psb_s_hybg_scal(d,a,info,side) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hybg_scal(d,a,info,side) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_s_hybg_scal + end subroutine psb_s_cuda_hybg_scal end interface interface - subroutine psb_s_hybg_scals(d,a,info) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hybg_scals(d,a,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hybg_scals + end subroutine psb_s_cuda_hybg_scals end interface @@ -231,9 +231,9 @@ contains ! == =================================== - function s_hybg_sizeof(a) result(res) + function s_cuda_hybg_sizeof(a) result(res) implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 res = res + psb_sizeof_sp * size(a%val) @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function s_hybg_sizeof + end function s_cuda_hybg_sizeof - function s_hybg_get_fmt() result(res) + function s_cuda_hybg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HYBG' - end function s_hybg_get_fmt + end function s_cuda_hybg_get_fmt @@ -265,42 +265,42 @@ contains ! ! == =================================== - subroutine s_hybg_free(a) + subroutine s_cuda_hybg_free(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) call a%psb_s_csr_sparse_mat%free() return - end subroutine s_hybg_free + end subroutine s_cuda_hybg_free - subroutine s_hybg_finalize(a) + subroutine s_cuda_hybg_finalize(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - type(psb_s_hybg_sparse_mat), intent(inout) :: a + type(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) return - end subroutine s_hybg_finalize + end subroutine s_cuda_hybg_finalize #else interface - subroutine psb_s_hybg_mold(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_mold(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hybg_mold + end subroutine psb_s_cuda_hybg_mold end interface #endif -end module psb_s_hybg_mat_mod +end module psb_s_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_s_gpu_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 similarity index 72% rename from cuda/psb_s_gpu_vect_mod.F90 rename to cuda/psb_s_cuda_vect_mod.F90 index 1371db53..e19c980a 100644 --- a/cuda/psb_s_gpu_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -30,15 +30,15 @@ ! -module psb_s_gpu_vect_mod +module psb_s_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod use psb_s_vect_mod use psb_i_vect_mod #ifdef HAVE_SPGPU - use psb_gpu_env_mod - use psb_i_gpu_vect_mod + use psb_cuda_env_mod + use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_s_vectordev_mod #endif @@ -47,7 +47,7 @@ module psb_s_gpu_vect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_s_base_vect_type) :: psb_s_vect_gpu + type, extends(psb_s_base_vect_type) :: psb_s_vect_cuda #ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr @@ -59,66 +59,66 @@ module psb_s_gpu_vect_mod type(c_ptr) :: i_buf = c_null_ptr integer :: i_buf_sz = 0 contains - procedure, pass(x) :: get_nrows => s_gpu_get_nrows - procedure, nopass :: get_fmt => s_gpu_get_fmt - - procedure, pass(x) :: all => s_gpu_all - procedure, pass(x) :: zero => s_gpu_zero - procedure, pass(x) :: asb_m => s_gpu_asb_m - procedure, pass(x) :: sync => s_gpu_sync - procedure, pass(x) :: sync_space => s_gpu_sync_space - procedure, pass(x) :: bld_x => s_gpu_bld_x - procedure, pass(x) :: bld_mn => s_gpu_bld_mn - procedure, pass(x) :: free => s_gpu_free - procedure, pass(x) :: ins_a => s_gpu_ins_a - procedure, pass(x) :: ins_v => s_gpu_ins_v - procedure, pass(x) :: is_host => s_gpu_is_host - procedure, pass(x) :: is_dev => s_gpu_is_dev - procedure, pass(x) :: is_sync => s_gpu_is_sync - procedure, pass(x) :: set_host => s_gpu_set_host - procedure, pass(x) :: set_dev => s_gpu_set_dev - procedure, pass(x) :: set_sync => s_gpu_set_sync - procedure, pass(x) :: set_scal => s_gpu_set_scal -!!$ procedure, pass(x) :: set_vect => s_gpu_set_vect - procedure, pass(x) :: gthzv_x => s_gpu_gthzv_x - procedure, pass(y) :: sctb => s_gpu_sctb - procedure, pass(y) :: sctb_x => s_gpu_sctb_x - procedure, pass(x) :: gthzbuf => s_gpu_gthzbuf - procedure, pass(y) :: sctb_buf => s_gpu_sctb_buf - procedure, pass(x) :: new_buffer => s_gpu_new_buffer - procedure, nopass :: device_wait => s_gpu_device_wait - procedure, pass(x) :: free_buffer => s_gpu_free_buffer - procedure, pass(x) :: maybe_free_buffer => s_gpu_maybe_free_buffer - procedure, pass(x) :: dot_v => s_gpu_dot_v - procedure, pass(x) :: dot_a => s_gpu_dot_a - procedure, pass(y) :: axpby_v => s_gpu_axpby_v - procedure, pass(y) :: axpby_a => s_gpu_axpby_a - procedure, pass(y) :: mlt_v => s_gpu_mlt_v - procedure, pass(y) :: mlt_a => s_gpu_mlt_a - procedure, pass(z) :: mlt_a_2 => s_gpu_mlt_a_2 - procedure, pass(z) :: mlt_v_2 => s_gpu_mlt_v_2 - procedure, pass(x) :: scal => s_gpu_scal - procedure, pass(x) :: nrm2 => s_gpu_nrm2 - procedure, pass(x) :: amax => s_gpu_amax - procedure, pass(x) :: asum => s_gpu_asum - procedure, pass(x) :: absval1 => s_gpu_absval1 - procedure, pass(x) :: absval2 => s_gpu_absval2 - - final :: s_gpu_vect_finalize + procedure, pass(x) :: get_nrows => s_cuda_get_nrows + procedure, nopass :: get_fmt => s_cuda_get_fmt + + procedure, pass(x) :: all => s_cuda_all + procedure, pass(x) :: zero => s_cuda_zero + procedure, pass(x) :: asb_m => s_cuda_asb_m + procedure, pass(x) :: sync => s_cuda_sync + procedure, pass(x) :: sync_space => s_cuda_sync_space + procedure, pass(x) :: bld_x => s_cuda_bld_x + procedure, pass(x) :: bld_mn => s_cuda_bld_mn + procedure, pass(x) :: free => s_cuda_free + procedure, pass(x) :: ins_a => s_cuda_ins_a + procedure, pass(x) :: ins_v => s_cuda_ins_v + procedure, pass(x) :: is_host => s_cuda_is_host + procedure, pass(x) :: is_dev => s_cuda_is_dev + procedure, pass(x) :: is_sync => s_cuda_is_sync + procedure, pass(x) :: set_host => s_cuda_set_host + procedure, pass(x) :: set_dev => s_cuda_set_dev + procedure, pass(x) :: set_sync => s_cuda_set_sync + procedure, pass(x) :: set_scal => s_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => s_cuda_set_vect + procedure, pass(x) :: gthzv_x => s_cuda_gthzv_x + procedure, pass(y) :: sctb => s_cuda_sctb + procedure, pass(y) :: sctb_x => s_cuda_sctb_x + procedure, pass(x) :: gthzbuf => s_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => s_cuda_sctb_buf + procedure, pass(x) :: new_buffer => s_cuda_new_buffer + procedure, nopass :: device_wait => s_cuda_device_wait + procedure, pass(x) :: free_buffer => s_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => s_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => s_cuda_dot_v + procedure, pass(x) :: dot_a => s_cuda_dot_a + procedure, pass(y) :: axpby_v => s_cuda_axpby_v + procedure, pass(y) :: axpby_a => s_cuda_axpby_a + procedure, pass(y) :: mlt_v => s_cuda_mlt_v + procedure, pass(y) :: mlt_a => s_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => s_cuda_mlt_v_2 + procedure, pass(x) :: scal => s_cuda_scal + procedure, pass(x) :: nrm2 => s_cuda_nrm2 + procedure, pass(x) :: amax => s_cuda_amax + procedure, pass(x) :: asum => s_cuda_asum + procedure, pass(x) :: absval1 => s_cuda_absval1 + procedure, pass(x) :: absval2 => s_cuda_absval2 + + final :: s_cuda_vect_finalize #endif - end type psb_s_vect_gpu + end type psb_s_vect_cuda - public :: psb_s_vect_gpu_ + public :: psb_s_vect_cuda_ private :: constructor - interface psb_s_vect_gpu_ + interface psb_s_vect_cuda_ module procedure constructor - end interface psb_s_vect_gpu_ + end interface psb_s_vect_cuda_ contains function constructor(x) result(this) real(psb_spk_) :: x(:) - type(psb_s_vect_gpu) :: this + type(psb_s_vect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -128,20 +128,20 @@ contains #ifdef HAVE_SPGPU - subroutine s_gpu_device_wait() + subroutine s_cuda_device_wait() call psb_cudaSync() - end subroutine s_gpu_device_wait + end subroutine s_cuda_device_wait - subroutine s_gpu_new_buffer(n,x,info) + subroutine s_cuda_new_buffer(n,x,info) use psb_realloc_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(x%combuf)) then if (size(x%combuf) idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then ! ! Only need a sync in this branch; in the others ! cudamemCpy acts as a sync point. @@ -331,14 +331,14 @@ contains end select - end subroutine s_gpu_gthzv_x + end subroutine s_cuda_gthzv_x - subroutine s_gpu_gthzbuf(i,n,idx,x) - use psb_gpu_env_mod + subroutine s_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i,n class(psb_i_base_vect_type) :: idx - class(psb_s_vect_gpu) :: x + class(psb_s_vect_cuda) :: x integer :: info, ni info = 0 @@ -349,11 +349,11 @@ contains end if select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) @@ -384,14 +384,14 @@ contains end select - end subroutine s_gpu_gthzbuf + end subroutine s_cuda_gthzbuf - subroutine s_gpu_sctb(n,idx,x,beta,y) + subroutine s_cuda_sctb(n,idx,x,beta,y) implicit none !use psb_const_mod integer(psb_ipk_) :: n, idx(:) real(psb_spk_) :: beta, x(:) - class(psb_s_vect_gpu) :: y + class(psb_s_vect_cuda) :: y integer(psb_ipk_) :: info if (n == 0) return @@ -401,24 +401,24 @@ contains call y%psb_s_base_vect_type%sctb(n,idx,x,beta) call y%set_host() - end subroutine s_gpu_sctb + end subroutine s_cuda_sctb - subroutine s_gpu_sctb_x(i,n,idx,x,beta,y) - use psb_gpu_env_mod + subroutine s_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx real(psb_spk_) :: beta, x(:) - class(psb_s_vect_gpu) :: y + class(psb_s_vect_cuda) :: y integer :: info, ni select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() ! - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(y%pinned_buffer)) then if (size(y%pinned_buffer) < n) then call inner_unregister(y%pinned_buffer) @@ -506,16 +506,16 @@ contains call psb_cudaSync() call y%set_dev() - end subroutine s_gpu_sctb_x + end subroutine s_cuda_sctb_x - subroutine s_gpu_sctb_buf(i,n,idx,beta,y) + subroutine s_cuda_sctb_buf(i,n,idx,beta,y) use psi_serial_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx real(psb_spk_) :: beta - class(psb_s_vect_gpu) :: y + class(psb_s_vect_cuda) :: y integer(psb_ipk_) :: info, ni !!$ write(0,*) 'Starting sctb_buf' @@ -526,11 +526,11 @@ contains select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) else @@ -557,106 +557,106 @@ contains end select !!$ write(0,*) 'Done sctb_buf' - end subroutine s_gpu_sctb_buf + end subroutine s_cuda_sctb_buf - subroutine s_gpu_bld_x(x,this) + subroutine s_cuda_bld_x(x,this) use psb_base_mod real(psb_spk_), intent(in) :: this(:) - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call psb_realloc(size(this),x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'s_gpu_bld_x',& + call psb_errpush(info,'s_cuda_bld_x',& & i_err=(/size(this),izero,izero,izero,izero/)) end if x%v(:) = this(:) call x%set_host() call x%sync() - end subroutine s_gpu_bld_x + end subroutine s_cuda_bld_x - subroutine s_gpu_bld_mn(x,n) + subroutine s_cuda_bld_mn(x,n) integer(psb_mpk_), intent(in) :: n - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(n,info) if (info /= 0) then - call psb_errpush(info,'s_gpu_bld_n',i_err=(/n,n,n,n,n/)) + call psb_errpush(info,'s_cuda_bld_n',i_err=(/n,n,n,n,n/)) end if - end subroutine s_gpu_bld_mn + end subroutine s_cuda_bld_mn - subroutine s_gpu_set_host(x) + subroutine s_cuda_set_host(x) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x x%state = is_host - end subroutine s_gpu_set_host + end subroutine s_cuda_set_host - subroutine s_gpu_set_dev(x) + subroutine s_cuda_set_dev(x) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x x%state = is_dev - end subroutine s_gpu_set_dev + end subroutine s_cuda_set_dev - subroutine s_gpu_set_sync(x) + subroutine s_cuda_set_sync(x) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x x%state = is_sync - end subroutine s_gpu_set_sync + end subroutine s_cuda_set_sync - function s_gpu_is_dev(x) result(res) + function s_cuda_is_dev(x) result(res) implicit none - class(psb_s_vect_gpu), intent(in) :: x + class(psb_s_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function s_gpu_is_dev + end function s_cuda_is_dev - function s_gpu_is_host(x) result(res) + function s_cuda_is_host(x) result(res) implicit none - class(psb_s_vect_gpu), intent(in) :: x + class(psb_s_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function s_gpu_is_host + end function s_cuda_is_host - function s_gpu_is_sync(x) result(res) + function s_cuda_is_sync(x) result(res) implicit none - class(psb_s_vect_gpu), intent(in) :: x + class(psb_s_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function s_gpu_is_sync + end function s_cuda_is_sync - function s_gpu_get_nrows(x) result(res) + function s_cuda_get_nrows(x) result(res) implicit none - class(psb_s_vect_gpu), intent(in) :: x + class(psb_s_vect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = size(x%v) - end function s_gpu_get_nrows + end function s_cuda_get_nrows - function s_gpu_get_fmt() result(res) + function s_cuda_get_fmt() result(res) implicit none character(len=5) :: res res = 'sGPU' - end function s_gpu_get_fmt + end function s_cuda_get_fmt - subroutine s_gpu_all(n, x, info) + subroutine s_cuda_all(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: n - class(psb_s_vect_gpu), intent(out) :: x + class(psb_s_vect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(n,x%v,info) @@ -664,26 +664,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'s_gpu_all',& + call psb_errpush(info,'s_cuda_all',& & i_err=(/n,n,n,n,n/)) end if - end subroutine s_gpu_all + end subroutine s_cuda_all - subroutine s_gpu_zero(x) + subroutine s_cuda_zero(x) use psi_serial_mod implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=szero call x%set_host() - end subroutine s_gpu_zero + end subroutine s_cuda_zero - subroutine s_gpu_asb_m(n, x, info) + subroutine s_cuda_asb_m(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_mpk_), intent(in) :: n - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: nd @@ -703,12 +703,12 @@ contains end if end if - end subroutine s_gpu_asb_m + end subroutine s_cuda_asb_m - subroutine s_gpu_sync_space(x,info) + subroutine s_cuda_sync_space(x,info) use psb_base_mod, only : psb_realloc implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh, nd @@ -747,12 +747,12 @@ contains end if end if - end subroutine s_gpu_sync_space + end subroutine s_cuda_sync_space - subroutine s_gpu_sync(x) + subroutine s_cuda_sync(x) use psb_base_mod, only : psb_realloc implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -778,31 +778,31 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'s_gpu_sync') + call psb_errpush(info,'s_cuda_sync') end if - end subroutine s_gpu_sync + end subroutine s_cuda_sync - subroutine s_gpu_free(x, info) + subroutine s_cuda_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) if (c_associated(x%deviceVect)) then -!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' call freeMultiVecDevice(x%deviceVect) x%deviceVect=c_null_ptr end if call x%free_buffer(info) call x%set_sync() - end subroutine s_gpu_free + end subroutine s_cuda_free - subroutine s_gpu_set_scal(x,val,first,last) - class(psb_s_vect_gpu), intent(inout) :: x + subroutine s_cuda_set_scal(x,val,first,last) + class(psb_s_vect_cuda), intent(inout) :: x real(psb_spk_), intent(in) :: val integer(psb_ipk_), optional :: first, last @@ -817,10 +817,10 @@ contains info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() - end subroutine s_gpu_set_scal + end subroutine s_cuda_set_scal !!$ -!!$ subroutine s_gpu_set_vect(x,val) -!!$ class(psb_s_vect_gpu), intent(inout) :: x +!!$ subroutine s_cuda_set_vect(x,val) +!!$ class(psb_s_vect_cuda), intent(inout) :: x !!$ real(psb_spk_), intent(in) :: val(:) !!$ integer(psb_ipk_) :: nr !!$ integer(psb_ipk_) :: info @@ -829,13 +829,13 @@ contains !!$ call x%psb_s_base_vect_type%set_vect(val) !!$ call x%set_host() !!$ -!!$ end subroutine s_gpu_set_vect +!!$ end subroutine s_cuda_set_vect - function s_gpu_dot_v(n,x,y) result(res) + function s_cuda_dot_v(n,x,y) result(res) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res @@ -852,13 +852,13 @@ contains type is (psb_s_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n,x%v,1,yy%v,1) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) if (info /= 0) then info = psb_err_internal_error_ - call psb_errpush(info,'s_gpu_dot_v') + call psb_errpush(info,'s_cuda_dot_v') end if class default @@ -867,11 +867,11 @@ contains res = y%dot(n,x%v) end select - end function s_gpu_dot_v + end function s_cuda_dot_v - function s_gpu_dot_a(n,x,y) result(res) + function s_cuda_dot_a(n,x,y) result(res) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x real(psb_spk_), intent(in) :: y(:) integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res @@ -880,14 +880,14 @@ contains if (x%is_dev()) call x%sync() res = ddot(n,y,1,x%v,1) - end function s_gpu_dot_a + end function s_cuda_dot_a - subroutine s_gpu_axpby_v(m,alpha, x, beta, y, info) + subroutine s_cuda_axpby_v(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m class(psb_s_base_vect_type), intent(inout) :: x - class(psb_s_vect_gpu), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: y real(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny @@ -895,7 +895,7 @@ contains info = psb_success_ select type(xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) ! Do something different here if ((beta /= szero).and.y%is_host())& & call y%sync() @@ -915,14 +915,14 @@ contains call y%axpby(m,alpha,x%v,beta,info) end select - end subroutine s_gpu_axpby_v + end subroutine s_cuda_axpby_v - subroutine s_gpu_axpby_a(m,alpha, x, beta, y, info) + subroutine s_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m real(psb_spk_), intent(in) :: x(:) - class(psb_s_vect_gpu), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: y real(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info @@ -930,13 +930,13 @@ contains & call y%sync() call psb_geaxpby(m,alpha,x,beta,y%v,info) call y%set_host() - end subroutine s_gpu_axpby_a + end subroutine s_cuda_axpby_a - subroutine s_gpu_mlt_v(x, y, info) + subroutine s_cuda_mlt_v(x, y, info) use psi_serial_mod implicit none class(psb_s_base_vect_type), intent(inout) :: x - class(psb_s_vect_gpu), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -950,7 +950,7 @@ contains y%v(i) = y%v(i) * xx%v(i) end do call y%set_host() - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) ! Do something different here if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -963,13 +963,13 @@ contains call y%set_host() end select - end subroutine s_gpu_mlt_v + end subroutine s_cuda_mlt_v - subroutine s_gpu_mlt_a(x, y, info) + subroutine s_cuda_mlt_a(x, y, info) use psi_serial_mod implicit none real(psb_spk_), intent(in) :: x(:) - class(psb_s_vect_gpu), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -977,15 +977,15 @@ contains if (y%is_dev()) call y%sync() call y%psb_s_base_vect_type%mlt(x,info) ! set_host() is invoked in the base method - end subroutine s_gpu_mlt_a + end subroutine s_cuda_mlt_a - subroutine s_gpu_mlt_a_2(alpha,x,y,beta,z,info) + subroutine s_cuda_mlt_a_2(alpha,x,y,beta,z,info) use psi_serial_mod implicit none real(psb_spk_), intent(in) :: alpha,beta real(psb_spk_), intent(in) :: x(:) real(psb_spk_), intent(in) :: y(:) - class(psb_s_vect_gpu), intent(inout) :: z + class(psb_s_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -993,16 +993,16 @@ contains if (z%is_dev()) call z%sync() call z%psb_s_base_vect_type%mlt(alpha,x,y,beta,info) ! set_host() is invoked in the base method - end subroutine s_gpu_mlt_a_2 + end subroutine s_cuda_mlt_a_2 - subroutine s_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + subroutine s_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) use psi_serial_mod use psb_string_mod implicit none real(psb_spk_), intent(in) :: alpha,beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y - class(psb_s_vect_gpu), intent(inout) :: z + class(psb_s_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n @@ -1025,9 +1025,9 @@ contains ! info = 0 select type(xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type (yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= szero).and.(z%is_host())) call z%sync() @@ -1049,23 +1049,23 @@ contains call z%psb_s_base_vect_type%mlt(alpha,x,y,beta,info) call z%set_host() end select - end subroutine s_gpu_mlt_v_2 + end subroutine s_cuda_mlt_v_2 - subroutine s_gpu_scal(alpha, x) + subroutine s_cuda_scal(alpha, x) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x real(psb_spk_), intent (in) :: alpha integer(psb_ipk_) :: info if (x%is_host()) call x%sync() info = scalMultiVecDevice(alpha,x%deviceVect) call x%set_dev() - end subroutine s_gpu_scal + end subroutine s_cuda_scal - function s_gpu_nrm2(n,x) result(res) + function s_cuda_nrm2(n,x) result(res) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1073,11 +1073,11 @@ contains if (x%is_host()) call x%sync() info = nrm2MultiVecDevice(res,n,x%deviceVect) - end function s_gpu_nrm2 + end function s_cuda_nrm2 - function s_gpu_amax(n,x) result(res) + function s_cuda_amax(n,x) result(res) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1085,11 +1085,11 @@ contains if (x%is_host()) call x%sync() info = amaxMultiVecDevice(res,n,x%deviceVect) - end function s_gpu_amax + end function s_cuda_amax - function s_gpu_asum(n,x) result(res) + function s_cuda_asum(n,x) result(res) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1097,11 +1097,11 @@ contains if (x%is_host()) call x%sync() info = asumMultiVecDevice(res,n,x%deviceVect) - end function s_gpu_asum + end function s_cuda_asum - subroutine s_gpu_absval1(x) + subroutine s_cuda_absval1(x) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n integer(psb_ipk_) :: info @@ -1109,18 +1109,18 @@ contains n=x%get_nrows() info = absMultiVecDevice(n,sone,x%deviceVect) - end subroutine s_gpu_absval1 + end subroutine s_cuda_absval1 - subroutine s_gpu_absval2(x,y) + subroutine s_cuda_absval2(x,y) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_) :: n integer(psb_ipk_) :: info n=min(x%get_nrows(),y%get_nrows()) select type (yy=> y) - class is (psb_s_vect_gpu) + class is (psb_s_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = absMultiVecDevice(n,sone,x%deviceVect,yy%deviceVect) @@ -1129,67 +1129,67 @@ contains if (y%is_dev()) call y%sync() call x%psb_s_base_vect_type%absval(y) end select - end subroutine s_gpu_absval2 + end subroutine s_cuda_absval2 - subroutine s_gpu_vect_finalize(x) + subroutine s_cuda_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_s_vect_gpu), intent(inout) :: x + type(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 call x%free(info) - end subroutine s_gpu_vect_finalize + end subroutine s_cuda_vect_finalize - subroutine s_gpu_ins_v(n,irl,val,dupl,x,info) + subroutine s_cuda_ins_v(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl class(psb_i_base_vect_type), intent(inout) :: irl class(psb_s_base_vect_type), intent(inout) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, isz - logical :: done_gpu + logical :: done_cuda info = 0 if (psb_errstatus_fatal()) return - done_gpu = .false. + done_cuda = .false. select type(virl => irl) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type(vval => val) - class is (psb_s_vect_gpu) + class is (psb_s_vect_cuda) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() info = geinsMultiVecDeviceFloat(n,virl%deviceVect,& & vval%deviceVect,dupl,1,x%deviceVect) call x%set_dev() - done_gpu=.true. + done_cuda=.true. end select end select - if (.not.done_gpu) then + if (.not.done_cuda) then if (irl%is_dev()) call irl%sync() if (val%is_dev()) call val%sync() call x%ins(n,irl%v,val%v,dupl,info) end if if (info /= 0) then - call psb_errpush(info,'gpu_vect_ins') + call psb_errpush(info,'cuda_vect_ins') return end if - end subroutine s_gpu_ins_v + end subroutine s_cuda_ins_v - subroutine s_gpu_ins_a(n,irl,val,dupl,x,info) + subroutine s_cuda_ins_a(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) real(psb_spk_), intent(in) :: val(:) @@ -1202,11 +1202,11 @@ contains call x%psb_s_base_vect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine s_gpu_ins_a + end subroutine s_cuda_ins_a #endif -end module psb_s_gpu_vect_mod +end module psb_s_cuda_vect_mod ! @@ -1215,7 +1215,7 @@ end module psb_s_gpu_vect_mod -module psb_s_gpu_multivect_mod +module psb_s_cuda_multivect_mod use iso_c_binding use psb_const_mod use psb_error_mod @@ -1224,7 +1224,7 @@ module psb_s_gpu_multivect_mod use psb_i_multivect_mod #ifdef HAVE_SPGPU - use psb_i_gpu_multivect_mod + use psb_i_cuda_multivect_mod use psb_s_vectordev_mod #endif @@ -1232,7 +1232,7 @@ module psb_s_gpu_multivect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_s_base_multivect_type) :: psb_s_multivect_gpu + type, extends(psb_s_base_multivect_type) :: psb_s_multivect_cuda #ifdef HAVE_SPGPU integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 @@ -1240,48 +1240,48 @@ module psb_s_gpu_multivect_mod real(c_double), allocatable :: buffer(:,:) type(c_ptr) :: dt_buf = c_null_ptr contains - procedure, pass(x) :: get_nrows => s_gpu_multi_get_nrows - procedure, pass(x) :: get_ncols => s_gpu_multi_get_ncols - procedure, nopass :: get_fmt => s_gpu_multi_get_fmt -!!$ procedure, pass(x) :: dot_v => s_gpu_multi_dot_v -!!$ procedure, pass(x) :: dot_a => s_gpu_multi_dot_a -!!$ procedure, pass(y) :: axpby_v => s_gpu_multi_axpby_v -!!$ procedure, pass(y) :: axpby_a => s_gpu_multi_axpby_a -!!$ procedure, pass(y) :: mlt_v => s_gpu_multi_mlt_v -!!$ procedure, pass(y) :: mlt_a => s_gpu_multi_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => s_gpu_multi_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => s_gpu_multi_mlt_v_2 -!!$ procedure, pass(x) :: scal => s_gpu_multi_scal -!!$ procedure, pass(x) :: nrm2 => s_gpu_multi_nrm2 -!!$ procedure, pass(x) :: amax => s_gpu_multi_amax -!!$ procedure, pass(x) :: asum => s_gpu_multi_asum - procedure, pass(x) :: all => s_gpu_multi_all - procedure, pass(x) :: zero => s_gpu_multi_zero - procedure, pass(x) :: asb => s_gpu_multi_asb - procedure, pass(x) :: sync => s_gpu_multi_sync - procedure, pass(x) :: sync_space => s_gpu_multi_sync_space - procedure, pass(x) :: bld_x => s_gpu_multi_bld_x - procedure, pass(x) :: bld_n => s_gpu_multi_bld_n - procedure, pass(x) :: free => s_gpu_multi_free - procedure, pass(x) :: ins => s_gpu_multi_ins - procedure, pass(x) :: is_host => s_gpu_multi_is_host - procedure, pass(x) :: is_dev => s_gpu_multi_is_dev - procedure, pass(x) :: is_sync => s_gpu_multi_is_sync - procedure, pass(x) :: set_host => s_gpu_multi_set_host - procedure, pass(x) :: set_dev => s_gpu_multi_set_dev - procedure, pass(x) :: set_sync => s_gpu_multi_set_sync - procedure, pass(x) :: set_scal => s_gpu_multi_set_scal - procedure, pass(x) :: set_vect => s_gpu_multi_set_vect -!!$ procedure, pass(x) :: gthzv_x => s_gpu_multi_gthzv_x -!!$ procedure, pass(y) :: sctb => s_gpu_multi_sctb -!!$ procedure, pass(y) :: sctb_x => s_gpu_multi_sctb_x - final :: s_gpu_multi_vect_finalize + procedure, pass(x) :: get_nrows => s_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => s_cuda_multi_get_ncols + procedure, nopass :: get_fmt => s_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => s_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => s_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => s_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => s_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => s_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => s_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => s_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => s_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => s_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => s_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => s_cuda_multi_amax +!!$ procedure, pass(x) :: asum => s_cuda_multi_asum + procedure, pass(x) :: all => s_cuda_multi_all + procedure, pass(x) :: zero => s_cuda_multi_zero + procedure, pass(x) :: asb => s_cuda_multi_asb + procedure, pass(x) :: sync => s_cuda_multi_sync + procedure, pass(x) :: sync_space => s_cuda_multi_sync_space + procedure, pass(x) :: bld_x => s_cuda_multi_bld_x + procedure, pass(x) :: bld_n => s_cuda_multi_bld_n + procedure, pass(x) :: free => s_cuda_multi_free + procedure, pass(x) :: ins => s_cuda_multi_ins + procedure, pass(x) :: is_host => s_cuda_multi_is_host + procedure, pass(x) :: is_dev => s_cuda_multi_is_dev + procedure, pass(x) :: is_sync => s_cuda_multi_is_sync + procedure, pass(x) :: set_host => s_cuda_multi_set_host + procedure, pass(x) :: set_dev => s_cuda_multi_set_dev + procedure, pass(x) :: set_sync => s_cuda_multi_set_sync + procedure, pass(x) :: set_scal => s_cuda_multi_set_scal + procedure, pass(x) :: set_vect => s_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => s_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => s_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => s_cuda_multi_sctb_x + final :: s_cuda_multi_vect_finalize #endif - end type psb_s_multivect_gpu + end type psb_s_multivect_cuda - public :: psb_s_multivect_gpu + public :: psb_s_multivect_cuda private :: constructor - interface psb_s_multivect_gpu + interface psb_s_multivect_cuda module procedure constructor end interface @@ -1289,7 +1289,7 @@ contains function constructor(x) result(this) real(psb_spk_) :: x(:,:) - type(psb_s_multivect_gpu) :: this + type(psb_s_multivect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -1299,15 +1299,15 @@ contains #ifdef HAVE_SPGPU -!!$ subroutine s_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ subroutine s_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i,n !!$ class(psb_i_base_multivect_type) :: idx !!$ real(psb_spk_) :: y(:) -!!$ class(psb_s_multivect_gpu) :: x +!!$ class(psb_s_multivect_cuda) :: x !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (x%is_host()) call x%sync() !!$ @@ -1332,16 +1332,16 @@ contains !!$ end select !!$ !!$ -!!$ end subroutine s_gpu_multi_gthzv_x +!!$ end subroutine s_cuda_multi_gthzv_x !!$ !!$ !!$ -!!$ subroutine s_gpu_multi_sctb(n,idx,x,beta,y) +!!$ subroutine s_cuda_multi_sctb(n,idx,x,beta,y) !!$ implicit none !!$ !use psb_const_mod !!$ integer(psb_ipk_) :: n, idx(:) !!$ real(psb_spk_) :: beta, x(:) -!!$ class(psb_s_multivect_gpu) :: y +!!$ class(psb_s_multivect_cuda) :: y !!$ integer(psb_ipk_) :: info !!$ !!$ if (n == 0) return @@ -1351,17 +1351,17 @@ contains !!$ call y%psb_s_base_multivect_type%sctb(n,idx,x,beta) !!$ call y%set_host() !!$ -!!$ end subroutine s_gpu_multi_sctb +!!$ end subroutine s_cuda_multi_sctb !!$ -!!$ subroutine s_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ subroutine s_cuda_multi_sctb_x(i,n,idx,x,beta,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i, n !!$ class(psb_i_base_multivect_type) :: idx !!$ real(psb_spk_) :: beta, x(:) -!!$ class(psb_s_multivect_gpu) :: y +!!$ class(psb_s_multivect_cuda) :: y !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (y%is_host()) call y%sync() !!$ @@ -1387,13 +1387,13 @@ contains !!$ call y%sct(n,ii%v(i:),x,beta) !!$ end select !!$ -!!$ end subroutine s_gpu_multi_sctb_x +!!$ end subroutine s_cuda_multi_sctb_x - subroutine s_gpu_multi_bld_x(x,this) + subroutine s_cuda_multi_bld_x(x,this) use psb_base_mod real(psb_spk_), intent(in) :: this(:,:) - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info, m, n m=size(this,1) @@ -1403,101 +1403,101 @@ contains call psb_realloc(m,n,x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'s_gpu_multi_bld_x',& + call psb_errpush(info,'s_cuda_multi_bld_x',& & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) end if x%v(1:m,1:n) = this(1:m,1:n) call x%set_host() call x%sync() - end subroutine s_gpu_multi_bld_x + end subroutine s_cuda_multi_bld_x - subroutine s_gpu_multi_bld_n(x,m,n) + subroutine s_cuda_multi_bld_n(x,m,n) integer(psb_ipk_), intent(in) :: m,n - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(m,n,info) if (info /= 0) then - call psb_errpush(info,'s_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + call psb_errpush(info,'s_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) end if - end subroutine s_gpu_multi_bld_n + end subroutine s_cuda_multi_bld_n - subroutine s_gpu_multi_set_host(x) + subroutine s_cuda_multi_set_host(x) implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x x%state = is_host - end subroutine s_gpu_multi_set_host + end subroutine s_cuda_multi_set_host - subroutine s_gpu_multi_set_dev(x) + subroutine s_cuda_multi_set_dev(x) implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x x%state = is_dev - end subroutine s_gpu_multi_set_dev + end subroutine s_cuda_multi_set_dev - subroutine s_gpu_multi_set_sync(x) + subroutine s_cuda_multi_set_sync(x) implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x x%state = is_sync - end subroutine s_gpu_multi_set_sync + end subroutine s_cuda_multi_set_sync - function s_gpu_multi_is_dev(x) result(res) + function s_cuda_multi_is_dev(x) result(res) implicit none - class(psb_s_multivect_gpu), intent(in) :: x + class(psb_s_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function s_gpu_multi_is_dev + end function s_cuda_multi_is_dev - function s_gpu_multi_is_host(x) result(res) + function s_cuda_multi_is_host(x) result(res) implicit none - class(psb_s_multivect_gpu), intent(in) :: x + class(psb_s_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function s_gpu_multi_is_host + end function s_cuda_multi_is_host - function s_gpu_multi_is_sync(x) result(res) + function s_cuda_multi_is_sync(x) result(res) implicit none - class(psb_s_multivect_gpu), intent(in) :: x + class(psb_s_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function s_gpu_multi_is_sync + end function s_cuda_multi_is_sync - function s_gpu_multi_get_nrows(x) result(res) + function s_cuda_multi_get_nrows(x) result(res) implicit none - class(psb_s_multivect_gpu), intent(in) :: x + class(psb_s_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_nrows - end function s_gpu_multi_get_nrows + end function s_cuda_multi_get_nrows - function s_gpu_multi_get_ncols(x) result(res) + function s_cuda_multi_get_ncols(x) result(res) implicit none - class(psb_s_multivect_gpu), intent(in) :: x + class(psb_s_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_ncols - end function s_gpu_multi_get_ncols + end function s_cuda_multi_get_ncols - function s_gpu_multi_get_fmt() result(res) + function s_cuda_multi_get_fmt() result(res) implicit none character(len=5) :: res res = 'sGPU' - end function s_gpu_multi_get_fmt + end function s_cuda_multi_get_fmt -!!$ function s_gpu_multi_dot_v(n,x,y) result(res) +!!$ function s_cuda_multi_dot_v(n,x,y) result(res) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ class(psb_s_base_multivect_type), intent(inout) :: y !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res @@ -1514,13 +1514,13 @@ contains !!$ type is (psb_s_base_multivect_type) !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,x%v,1,yy%v,1) -!!$ type is (psb_s_multivect_gpu) +!!$ type is (psb_s_multivect_cuda) !!$ if (x%is_host()) call x%sync() !!$ if (yy%is_host()) call yy%sync() !!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) !!$ if (info /= 0) then !!$ info = psb_err_internal_error_ -!!$ call psb_errpush(info,'s_gpu_multi_dot_v') +!!$ call psb_errpush(info,'s_cuda_multi_dot_v') !!$ end if !!$ !!$ class default @@ -1529,11 +1529,11 @@ contains !!$ res = y%dot(n,x%v) !!$ end select !!$ -!!$ end function s_gpu_multi_dot_v +!!$ end function s_cuda_multi_dot_v !!$ -!!$ function s_gpu_multi_dot_a(n,x,y) result(res) +!!$ function s_cuda_multi_dot_a(n,x,y) result(res) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ real(psb_spk_), intent(in) :: y(:) !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res @@ -1542,14 +1542,14 @@ contains !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,y,1,x%v,1) !!$ -!!$ end function s_gpu_multi_dot_a +!!$ end function s_cuda_multi_dot_a !!$ -!!$ subroutine s_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine s_cuda_multi_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ class(psb_s_base_multivect_type), intent(inout) :: x -!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ class(psb_s_multivect_cuda), intent(inout) :: y !!$ real(psb_spk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: nx, ny @@ -1562,7 +1562,7 @@ contains !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() -!!$ type is (psb_s_multivect_gpu) +!!$ type is (psb_s_multivect_cuda) !!$ ! Do something different here !!$ if ((beta /= dzero).and.y%is_host())& !!$ & call y%sync() @@ -1581,27 +1581,27 @@ contains !!$ call y%axpby(m,alpha,x%v,beta,info) !!$ end select !!$ -!!$ end subroutine s_gpu_multi_axpby_v +!!$ end subroutine s_cuda_multi_axpby_v !!$ -!!$ subroutine s_gpu_multi_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine s_cuda_multi_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ real(psb_spk_), intent(in) :: x(:) -!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ class(psb_s_multivect_cuda), intent(inout) :: y !!$ real(psb_spk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ if (y%is_dev()) call y%sync() !!$ call psb_geaxpby(m,alpha,x,beta,y%v,info) !!$ call y%set_host() -!!$ end subroutine s_gpu_multi_axpby_a +!!$ end subroutine s_cuda_multi_axpby_a !!$ -!!$ subroutine s_gpu_multi_mlt_v(x, y, info) +!!$ subroutine s_cuda_multi_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_s_base_multivect_type), intent(inout) :: x -!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ class(psb_s_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ integer(psb_ipk_) :: i, n @@ -1615,7 +1615,7 @@ contains !!$ y%v(i) = y%v(i) * xx%v(i) !!$ end do !!$ call y%set_host() -!!$ type is (psb_s_multivect_gpu) +!!$ type is (psb_s_multivect_cuda) !!$ ! Do something different here !!$ if (y%is_host()) call y%sync() !!$ if (xx%is_host()) call xx%sync() @@ -1627,13 +1627,13 @@ contains !!$ call y%set_host() !!$ end select !!$ -!!$ end subroutine s_gpu_multi_mlt_v +!!$ end subroutine s_cuda_multi_mlt_v !!$ -!!$ subroutine s_gpu_multi_mlt_a(x, y, info) +!!$ subroutine s_cuda_multi_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_spk_), intent(in) :: x(:) -!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ class(psb_s_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1641,15 +1641,15 @@ contains !!$ call y%sync() !!$ call y%psb_s_base_multivect_type%mlt(x,info) !!$ call y%set_host() -!!$ end subroutine s_gpu_multi_mlt_a +!!$ end subroutine s_cuda_multi_mlt_a !!$ -!!$ subroutine s_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine s_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_spk_), intent(in) :: alpha,beta !!$ real(psb_spk_), intent(in) :: x(:) !!$ real(psb_spk_), intent(in) :: y(:) -!!$ class(psb_s_multivect_gpu), intent(inout) :: z +!!$ class(psb_s_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1657,16 +1657,16 @@ contains !!$ if (z%is_dev()) call z%sync() !!$ call z%psb_s_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() -!!$ end subroutine s_gpu_multi_mlt_a_2 +!!$ end subroutine s_cuda_multi_mlt_a_2 !!$ -!!$ subroutine s_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ subroutine s_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod !!$ implicit none !!$ real(psb_spk_), intent(in) :: alpha,beta !!$ class(psb_s_base_multivect_type), intent(inout) :: x !!$ class(psb_s_base_multivect_type), intent(inout) :: y -!!$ class(psb_s_multivect_gpu), intent(inout) :: z +!!$ class(psb_s_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ character(len=1), intent(in), optional :: conjgx, conjgy !!$ integer(psb_ipk_) :: i, n @@ -1689,9 +1689,9 @@ contains !!$ ! !!$ info = 0 !!$ select type(xx => x) -!!$ type is (psb_s_multivect_gpu) +!!$ type is (psb_s_multivect_cuda) !!$ select type (yy => y) -!!$ type is (psb_s_multivect_gpu) +!!$ type is (psb_s_multivect_cuda) !!$ if (xx%is_host()) call xx%sync() !!$ if (yy%is_host()) call yy%sync() !!$ ! Z state is irrelevant: it will be done on the GPU. @@ -1711,11 +1711,11 @@ contains !!$ call z%psb_s_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() !!$ end select -!!$ end subroutine s_gpu_multi_mlt_v_2 +!!$ end subroutine s_cuda_multi_mlt_v_2 - subroutine s_gpu_multi_set_scal(x,val) - class(psb_s_multivect_gpu), intent(inout) :: x + subroutine s_cuda_multi_set_scal(x,val) + class(psb_s_multivect_cuda), intent(inout) :: x real(psb_spk_), intent(in) :: val integer(psb_ipk_) :: info @@ -1723,10 +1723,10 @@ contains if (x%is_dev()) call x%sync() call x%psb_s_base_multivect_type%set_scal(val) call x%set_host() - end subroutine s_gpu_multi_set_scal + end subroutine s_cuda_multi_set_scal - subroutine s_gpu_multi_set_vect(x,val) - class(psb_s_multivect_gpu), intent(inout) :: x + subroutine s_cuda_multi_set_vect(x,val) + class(psb_s_multivect_cuda), intent(inout) :: x real(psb_spk_), intent(in) :: val(:,:) integer(psb_ipk_) :: nr integer(psb_ipk_) :: info @@ -1735,24 +1735,24 @@ contains call x%psb_s_base_multivect_type%set_vect(val) call x%set_host() - end subroutine s_gpu_multi_set_vect + end subroutine s_cuda_multi_set_vect -!!$ subroutine s_gpu_multi_scal(alpha, x) +!!$ subroutine s_cuda_multi_scal(alpha, x) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ real(psb_spk_), intent (in) :: alpha !!$ !!$ if (x%is_dev()) call x%sync() !!$ call x%psb_s_base_multivect_type%scal(alpha) !!$ call x%set_host() -!!$ end subroutine s_gpu_multi_scal +!!$ end subroutine s_cuda_multi_scal !!$ !!$ -!!$ function s_gpu_multi_nrm2(n,x) result(res) +!!$ function s_cuda_multi_nrm2(n,x) result(res) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ integer(psb_ipk_) :: info @@ -1760,36 +1760,36 @@ contains !!$ if (x%is_host()) call x%sync() !!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) !!$ -!!$ end function s_gpu_multi_nrm2 +!!$ end function s_cuda_multi_nrm2 !!$ -!!$ function s_gpu_multi_amax(n,x) result(res) +!!$ function s_cuda_multi_amax(n,x) result(res) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = maxval(abs(x%v(1:n))) !!$ -!!$ end function s_gpu_multi_amax +!!$ end function s_cuda_multi_amax !!$ -!!$ function s_gpu_multi_asum(n,x) result(res) +!!$ function s_cuda_multi_asum(n,x) result(res) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = sum(abs(x%v(1:n))) !!$ -!!$ end function s_gpu_multi_asum +!!$ end function s_cuda_multi_asum - subroutine s_gpu_multi_all(m,n, x, info) + subroutine s_cuda_multi_all(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_multivect_gpu), intent(out) :: x + class(psb_s_multivect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(m,n,x%v,info,pad=szero) @@ -1799,26 +1799,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'s_gpu_multi_all',& + call psb_errpush(info,'s_cuda_multi_all',& & i_err=(/m,n,n,n,n/)) end if - end subroutine s_gpu_multi_all + end subroutine s_cuda_multi_all - subroutine s_gpu_multi_zero(x) + subroutine s_cuda_multi_zero(x) use psi_serial_mod implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine s_gpu_multi_zero + end subroutine s_cuda_multi_zero - subroutine s_gpu_multi_asb(m,n, x, info) + subroutine s_cuda_multi_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nd, nc @@ -1838,12 +1838,12 @@ contains call x%set_host() end if end if - end subroutine s_gpu_multi_asb + end subroutine s_cuda_multi_asb - subroutine s_gpu_multi_sync_space(x,info) + subroutine s_cuda_multi_sync_space(x,info) use psb_realloc_mod implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: mh,nh,md,nd @@ -1896,11 +1896,11 @@ contains end if - end subroutine s_gpu_multi_sync_space + end subroutine s_cuda_multi_sync_space - subroutine s_gpu_multi_sync(x) + subroutine s_cuda_multi_sync(x) implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -1916,16 +1916,16 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'s_gpu_multi_sync') + call psb_errpush(info,'s_cuda_multi_sync') end if - end subroutine s_gpu_multi_sync + end subroutine s_cuda_multi_sync - subroutine s_gpu_multi_free(x, info) + subroutine s_cuda_multi_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 @@ -1940,13 +1940,13 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine s_gpu_multi_free + end subroutine s_cuda_multi_free - subroutine s_gpu_multi_vect_finalize(x) + subroutine s_cuda_multi_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_s_multivect_gpu), intent(inout) :: x + type(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 @@ -1961,12 +1961,12 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine s_gpu_multi_vect_finalize + end subroutine s_cuda_multi_vect_finalize - subroutine s_gpu_multi_ins(n,irl,val,dupl,x,info) + subroutine s_cuda_multi_ins(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) real(psb_spk_), intent(in) :: val(:,:) @@ -1979,11 +1979,11 @@ contains call x%psb_s_base_multivect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine s_gpu_multi_ins + end subroutine s_cuda_multi_ins #endif -end module psb_s_gpu_multivect_mod +end module psb_s_cuda_multivect_mod diff --git a/cuda/psb_z_csrg_mat_mod.F90 b/cuda/psb_z_csrg_mat_mod.F90 deleted file mode 100644 index 14df1124..00000000 --- a/cuda/psb_z_csrg_mat_mod.F90 +++ /dev/null @@ -1,393 +0,0 @@ -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! - - -module psb_z_csrg_mat_mod - - use iso_c_binding - use psb_z_mat_mod - use cusparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_z_csr_sparse_mat) :: psb_z_csrg_sparse_mat - ! - ! cuSPARSE 4.0 CSR format. - ! - ! - ! - ! - ! -#ifdef HAVE_SPGPU - type(z_Cmat) :: deviceMat - integer(psb_ipk_) :: devstate = is_host - - contains - procedure, nopass :: get_fmt => z_csrg_get_fmt - procedure, pass(a) :: sizeof => z_csrg_sizeof - procedure, pass(a) :: vect_mv => psb_z_csrg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_z_csrg_inner_vect_sv - procedure, pass(a) :: csmm => psb_z_csrg_csmm - procedure, pass(a) :: csmv => psb_z_csrg_csmv - procedure, pass(a) :: scals => psb_z_csrg_scals - procedure, pass(a) :: scalv => psb_z_csrg_scal - procedure, pass(a) :: reallocate_nz => psb_z_csrg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_z_csrg_allocate_mnnz - ! Note: we do *not* need the TO methods, because the parent type - ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_csrg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_cp_csrg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_csrg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_mv_csrg_from_fmt - procedure, pass(a) :: free => z_csrg_free - procedure, pass(a) :: mold => psb_z_csrg_mold - procedure, pass(a) :: is_host => z_csrg_is_host - procedure, pass(a) :: is_dev => z_csrg_is_dev - procedure, pass(a) :: is_sync => z_csrg_is_sync - procedure, pass(a) :: set_host => z_csrg_set_host - procedure, pass(a) :: set_dev => z_csrg_set_dev - procedure, pass(a) :: set_sync => z_csrg_set_sync - procedure, pass(a) :: sync => z_csrg_sync - procedure, pass(a) :: to_gpu => psb_z_csrg_to_gpu - procedure, pass(a) :: from_gpu => psb_z_csrg_from_gpu - final :: z_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_csrg_mold -#endif - end type psb_z_csrg_sparse_mat - -#ifdef HAVE_SPGPU - private :: z_csrg_get_nzeros, z_csrg_free, z_csrg_get_fmt, & - & z_csrg_get_size, z_csrg_sizeof, z_csrg_get_nz_row - - - interface - subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta - class(psb_z_base_vect_type), intent(inout) :: x - class(psb_z_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_csrg_inner_vect_sv - end interface - - - interface - subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta - class(psb_z_base_vect_type), intent(inout) :: x - class(psb_z_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_csrg_vect_mv - end interface - - interface - subroutine psb_z_csrg_reallocate_nz(nz,a) - import :: psb_z_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: nz - class(psb_z_csrg_sparse_mat), intent(inout) :: a - end subroutine psb_z_csrg_reallocate_nz - end interface - - interface - subroutine psb_z_csrg_allocate_mnnz(m,n,a,nz) - import :: psb_z_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: m,n - class(psb_z_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_z_csrg_allocate_mnnz - end interface - - interface - subroutine psb_z_csrg_mold(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_csrg_mold - end interface - - interface - subroutine psb_z_csrg_to_gpu(a,info, nzrm) - import :: psb_z_csrg_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_z_csrg_to_gpu - end interface - - interface - subroutine psb_z_csrg_from_gpu(a,info) - import :: psb_z_csrg_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_csrg_from_gpu - end interface - - interface - subroutine psb_z_cp_csrg_from_coo(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - class(psb_z_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_csrg_from_coo - end interface - - interface - subroutine psb_z_cp_csrg_from_fmt(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - class(psb_z_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_csrg_from_fmt - end interface - - interface - subroutine psb_z_mv_csrg_from_coo(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - class(psb_z_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_csrg_from_coo - end interface - - interface - subroutine psb_z_mv_csrg_from_fmt(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - class(psb_z_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_csrg_from_fmt - end interface - - interface - subroutine psb_z_csrg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta, x(:) - complex(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_csrg_csmv - end interface - interface - subroutine psb_z_csrg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - complex(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_csrg_csmm - end interface - - interface - subroutine psb_z_csrg_scal(d,a,info,side) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - complex(psb_dpk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side - end subroutine psb_z_csrg_scal - end interface - - interface - subroutine psb_z_csrg_scals(d,a,info) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - complex(psb_dpk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_csrg_scals - end interface - - -contains - - ! == =================================== - ! - ! - ! - ! Getters - ! - ! - ! - ! - ! - ! == =================================== - - - function z_csrg_sizeof(a) result(res) - implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - res = 8 - res = res + (2*psb_sizeof_dp) * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - ! Should we account for the shadow data structure - ! on the GPU device side? - ! res = 2*res - - end function z_csrg_sizeof - - function z_csrg_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSRG' - end function z_csrg_get_fmt - - - - ! == =================================== - ! - ! - ! - ! Data management - ! - ! - ! - ! - ! - ! == =================================== - - - subroutine z_csrg_set_host(a) - implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine z_csrg_set_host - - subroutine z_csrg_set_dev(a) - implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine z_csrg_set_dev - - subroutine z_csrg_set_sync(a) - implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine z_csrg_set_sync - - function z_csrg_is_dev(a) result(res) - implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function z_csrg_is_dev - - function z_csrg_is_host(a) result(res) - implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function z_csrg_is_host - - function z_csrg_is_sync(a) result(res) - implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function z_csrg_is_sync - - - subroutine z_csrg_sync(a) - implicit none - class(psb_z_csrg_sparse_mat), target, intent(in) :: a - class(psb_z_csrg_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (tmpa%is_host()) then - call tmpa%to_gpu(info) - else if (tmpa%is_dev()) then - call tmpa%from_gpu(info) - end if - call tmpa%set_sync() - return - - end subroutine z_csrg_sync - - subroutine z_csrg_free(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - class(psb_z_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - call a%psb_z_csr_sparse_mat%free() - - return - - end subroutine z_csrg_free - - subroutine z_csrg_finalize(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - type(psb_z_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - - return - - end subroutine z_csrg_finalize - -#else - interface - subroutine psb_z_csrg_mold(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_csrg_mold - end interface - -#endif - -end module psb_z_csrg_mat_mod diff --git a/cuda/psb_z_cuda_csrg_mat_mod.F90 b/cuda/psb_z_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..75170185 --- /dev/null +++ b/cuda/psb_z_cuda_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_cuda_csrg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_csr_sparse_mat) :: psb_z_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(z_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => z_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_z_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_z_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => z_cuda_csrg_free + procedure, pass(a) :: mold => psb_z_cuda_csrg_mold + procedure, pass(a) :: is_host => z_cuda_csrg_is_host + procedure, pass(a) :: is_dev => z_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => z_cuda_csrg_is_sync + procedure, pass(a) :: set_host => z_cuda_csrg_set_host + procedure, pass(a) :: set_dev => z_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => z_cuda_csrg_set_sync + procedure, pass(a) :: sync => z_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_z_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_z_cuda_csrg_from_gpu + final :: z_cuda_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_cuda_csrg_mold +#endif + end type psb_z_cuda_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_cuda_csrg_get_nzeros, z_cuda_csrg_free, z_cuda_csrg_get_fmt, & + & z_cuda_csrg_get_size, z_cuda_csrg_sizeof, z_cuda_csrg_get_nz_row + + + interface + subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_z_cuda_csrg_reallocate_nz(nz,a) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_z_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_z_cuda_csrg_mold(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_mold + end interface + + interface + subroutine psb_z_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_z_cuda_csrg_from_gpu(a,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_z_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_z_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_z_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_z_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_csmv + end interface + interface + subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_csmm + end interface + + interface + subroutine psb_z_cuda_csrg_scal(d,a,info,side) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_cuda_csrg_scal + end interface + + interface + subroutine psb_z_cuda_csrg_scals(d,a,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_cuda_csrg_sizeof + + function z_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function z_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine z_cuda_csrg_set_host(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_cuda_csrg_set_host + + subroutine z_cuda_csrg_set_dev(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_cuda_csrg_set_dev + + subroutine z_cuda_csrg_set_sync(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_cuda_csrg_set_sync + + function z_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_cuda_csrg_is_dev + + function z_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_cuda_csrg_is_host + + function z_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_cuda_csrg_is_sync + + + subroutine z_cuda_csrg_sync(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine z_cuda_csrg_sync + + subroutine z_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_z_csr_sparse_mat%free() + + return + + end subroutine z_cuda_csrg_free + + subroutine z_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine z_cuda_csrg_finalize + +#else + interface + subroutine psb_z_cuda_csrg_mold(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_mold + end interface + +#endif + +end module psb_z_cuda_csrg_mat_mod diff --git a/cuda/psb_z_diag_mat_mod.F90 b/cuda/psb_z_cuda_diag_mat_mod.F90 similarity index 52% rename from cuda/psb_z_diag_mat_mod.F90 rename to cuda/psb_z_cuda_diag_mat_mod.F90 index 986d75d9..80906778 100644 --- a/cuda/psb_z_diag_mat_mod.F90 +++ b/cuda/psb_z_cuda_diag_mat_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_z_diag_mat_mod +module psb_z_cuda_diag_mat_mod use iso_c_binding use psb_base_mod use psb_z_dia_mat_mod - type, extends(psb_z_dia_sparse_mat) :: psb_z_diag_sparse_mat + type, extends(psb_z_dia_sparse_mat) :: psb_z_cuda_diag_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -48,170 +48,170 @@ module psb_z_diag_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => z_diag_get_fmt - procedure, pass(a) :: sizeof => z_diag_sizeof - procedure, pass(a) :: vect_mv => psb_z_diag_vect_mv -! procedure, pass(a) :: csmm => psb_z_diag_csmm - procedure, pass(a) :: csmv => psb_z_diag_csmv -! procedure, pass(a) :: in_vect_sv => psb_z_diag_inner_vect_sv -! procedure, pass(a) :: scals => psb_z_diag_scals -! procedure, pass(a) :: scalv => psb_z_diag_scal -! procedure, pass(a) :: reallocate_nz => psb_z_diag_reallocate_nz -! procedure, pass(a) :: allocate_mnnz => psb_z_diag_allocate_mnnz + procedure, nopass :: get_fmt => z_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => z_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_z_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_z_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_z_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_z_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_z_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_z_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_z_cuda_diag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_diag_from_coo -! procedure, pass(a) :: cp_from_fmt => psb_z_cp_diag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_diag_from_coo -! procedure, pass(a) :: mv_from_fmt => psb_z_mv_diag_from_fmt - procedure, pass(a) :: free => z_diag_free - procedure, pass(a) :: mold => psb_z_diag_mold - procedure, pass(a) :: to_gpu => psb_z_diag_to_gpu - final :: z_diag_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => z_cuda_diag_free + procedure, pass(a) :: mold => psb_z_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_diag_to_gpu + final :: z_cuda_diag_finalize #else contains - procedure, pass(a) :: mold => psb_z_diag_mold + procedure, pass(a) :: mold => psb_z_cuda_diag_mold #endif - end type psb_z_diag_sparse_mat + end type psb_z_cuda_diag_sparse_mat #ifdef HAVE_SPGPU - private :: z_diag_get_nzeros, z_diag_free, z_diag_get_fmt, & - & z_diag_get_size, z_diag_sizeof, z_diag_get_nz_row + private :: z_cuda_diag_get_nzeros, z_cuda_diag_free, z_cuda_diag_get_fmt, & + & z_cuda_diag_get_size, z_cuda_diag_sizeof, z_cuda_diag_get_nz_row interface - subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_diag_vect_mv + end subroutine psb_z_cuda_diag_vect_mv end interface interface - subroutine psb_z_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_z_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_diag_inner_vect_sv + end subroutine psb_z_cuda_diag_inner_vect_sv end interface interface - subroutine psb_z_diag_reallocate_nz(nz,a) - import :: psb_z_diag_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_diag_reallocate_nz(nz,a) + import :: psb_z_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_z_diag_sparse_mat), intent(inout) :: a - end subroutine psb_z_diag_reallocate_nz + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_diag_reallocate_nz end interface interface - subroutine psb_z_diag_allocate_mnnz(m,n,a,nz) - import :: psb_z_diag_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_z_diag_allocate_mnnz + end subroutine psb_z_cuda_diag_allocate_mnnz end interface interface - subroutine psb_z_diag_mold(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_mold(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_diag_mold + end subroutine psb_z_cuda_diag_mold end interface interface - subroutine psb_z_diag_to_gpu(a,info, nzrm) - import :: psb_z_diag_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_z_cuda_diag_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_z_diag_to_gpu + end subroutine psb_z_cuda_diag_to_gpu end interface interface - subroutine psb_z_cp_diag_from_coo(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_diag_from_coo(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_diag_from_coo + end subroutine psb_z_cuda_cp_diag_from_coo end interface interface - subroutine psb_z_cp_diag_from_fmt(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_diag_from_fmt + end subroutine psb_z_cuda_cp_diag_from_fmt end interface interface - subroutine psb_z_mv_diag_from_coo(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_diag_from_coo(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_diag_from_coo + end subroutine psb_z_cuda_mv_diag_from_coo end interface interface - subroutine psb_z_mv_diag_from_fmt(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_diag_from_fmt + end subroutine psb_z_cuda_mv_diag_from_fmt end interface interface - subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_diag_csmv + end subroutine psb_z_cuda_diag_csmv end interface interface - subroutine psb_z_diag_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_diag_csmm + end subroutine psb_z_cuda_diag_csmm end interface interface - subroutine psb_z_diag_scal(d,a,info, side) - import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_diag_scal(d,a,info, side) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_z_diag_scal + end subroutine psb_z_cuda_diag_scal end interface interface - subroutine psb_z_diag_scals(d,a,info) - import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_diag_scals(d,a,info) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_diag_scals + end subroutine psb_z_cuda_diag_scals end interface @@ -230,9 +230,9 @@ contains ! == =================================== - function z_diag_sizeof(a) result(res) + function z_cuda_diag_sizeof(a) result(res) implicit none - class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function z_diag_sizeof + end function z_cuda_diag_sizeof - function z_diag_get_fmt() result(res) + function z_cuda_diag_get_fmt() result(res) implicit none character(len=5) :: res res = 'DIAG' - end function z_diag_get_fmt + end function z_cuda_diag_get_fmt @@ -265,11 +265,11 @@ contains ! ! == =================================== - subroutine z_diag_free(a) + subroutine z_cuda_diag_free(a) use diagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) @@ -278,31 +278,31 @@ contains return - end subroutine z_diag_free + end subroutine z_cuda_diag_free - subroutine z_diag_finalize(a) + subroutine z_cuda_diag_finalize(a) use diagdev_mod implicit none - type(psb_z_diag_sparse_mat), intent(inout) :: a + type(psb_z_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine z_diag_finalize + end subroutine z_cuda_diag_finalize #else interface - subroutine psb_z_diag_mold(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_mold(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_diag_mold + end subroutine psb_z_cuda_diag_mold end interface #endif -end module psb_z_diag_mat_mod +end module psb_z_cuda_diag_mat_mod diff --git a/cuda/psb_z_dnsg_mat_mod.F90 b/cuda/psb_z_cuda_dnsg_mat_mod.F90 similarity index 51% rename from cuda/psb_z_dnsg_mat_mod.F90 rename to cuda/psb_z_cuda_dnsg_mat_mod.F90 index 6a3d4369..3fb2488b 100644 --- a/cuda/psb_z_dnsg_mat_mod.F90 +++ b/cuda/psb_z_cuda_dnsg_mat_mod.F90 @@ -30,14 +30,14 @@ ! -module psb_z_dnsg_mat_mod +module psb_z_cuda_dnsg_mat_mod use iso_c_binding use psb_z_mat_mod use psb_z_dns_mat_mod use dnsdev_mod - type, extends(psb_z_dns_sparse_mat) :: psb_z_dnsg_sparse_mat + type, extends(psb_z_dns_sparse_mat) :: psb_z_cuda_dnsg_sparse_mat ! ! ITPACK/DNS format, extended. ! We are adding here the routines to create a copy of the data @@ -49,169 +49,169 @@ module psb_z_dnsg_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => z_dnsg_get_fmt - ! procedure, pass(a) :: sizeof => z_dnsg_sizeof - procedure, pass(a) :: vect_mv => psb_z_dnsg_vect_mv -!!$ procedure, pass(a) :: csmm => psb_z_dnsg_csmm -!!$ procedure, pass(a) :: csmv => psb_z_dnsg_csmv -!!$ procedure, pass(a) :: in_vect_sv => psb_z_dnsg_inner_vect_sv -!!$ procedure, pass(a) :: scals => psb_z_dnsg_scals -!!$ procedure, pass(a) :: scalv => psb_z_dnsg_scal -!!$ procedure, pass(a) :: reallocate_nz => psb_z_dnsg_reallocate_nz -!!$ procedure, pass(a) :: allocate_mnnz => psb_z_dnsg_allocate_mnnz + procedure, nopass :: get_fmt => z_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => z_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_z_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_z_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_z_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_z_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_z_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_z_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_z_cuda_dnsg_allocate_mnnz ! Note: we *do* need the TO methods, because of the need to invoke SYNC ! - procedure, pass(a) :: cp_from_coo => psb_z_cp_dnsg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_cp_dnsg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_dnsg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_mv_dnsg_from_fmt - procedure, pass(a) :: free => z_dnsg_free - procedure, pass(a) :: mold => psb_z_dnsg_mold - procedure, pass(a) :: to_gpu => psb_z_dnsg_to_gpu - final :: z_dnsg_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => z_cuda_dnsg_free + procedure, pass(a) :: mold => psb_z_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_dnsg_to_gpu + final :: z_cuda_dnsg_finalize #else contains - procedure, pass(a) :: mold => psb_z_dnsg_mold + procedure, pass(a) :: mold => psb_z_cuda_dnsg_mold #endif - end type psb_z_dnsg_sparse_mat + end type psb_z_cuda_dnsg_sparse_mat #ifdef HAVE_SPGPU - private :: z_dnsg_get_nzeros, z_dnsg_free, z_dnsg_get_fmt, & - & z_dnsg_get_size, z_dnsg_get_nz_row + private :: z_cuda_dnsg_get_nzeros, z_cuda_dnsg_free, z_cuda_dnsg_get_fmt, & + & z_cuda_dnsg_get_size, z_cuda_dnsg_get_nz_row interface - subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_dnsg_vect_mv + end subroutine psb_z_cuda_dnsg_vect_mv end interface !!$ !!$ interface -!!$ subroutine psb_z_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_z_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type -!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_z_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_dnsg_inner_vect_sv +!!$ end subroutine psb_z_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_z_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_z_dnsg_reallocate_nz +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_z_dnsg_allocate_mnnz +!!$ end subroutine psb_z_cuda_dnsg_allocate_mnnz !!$ end interface interface - subroutine psb_z_dnsg_mold(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_dnsg_mold(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_dnsg_mold + end subroutine psb_z_cuda_dnsg_mold end interface interface - subroutine psb_z_dnsg_to_gpu(a,info) - import :: psb_z_dnsg_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_dnsg_to_gpu(a,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_dnsg_to_gpu + end subroutine psb_z_cuda_dnsg_to_gpu end interface interface - subroutine psb_z_cp_dnsg_from_coo(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_dnsg_from_coo + end subroutine psb_z_cuda_cp_dnsg_from_coo end interface interface - subroutine psb_z_cp_dnsg_from_fmt(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_dnsg_from_fmt + end subroutine psb_z_cuda_cp_dnsg_from_fmt end interface interface - subroutine psb_z_mv_dnsg_from_coo(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_dnsg_from_coo + end subroutine psb_z_cuda_mv_dnsg_from_coo end interface interface - subroutine psb_z_mv_dnsg_from_fmt(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_dnsg_from_fmt + end subroutine psb_z_cuda_mv_dnsg_from_fmt end interface !!$ interface -!!$ subroutine psb_z_dnsg_csmv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:) !!$ complex(psb_dpk_), intent(inout) :: y(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_dnsg_csmv +!!$ end subroutine psb_z_cuda_dnsg_csmv !!$ end interface !!$ interface -!!$ subroutine psb_z_dnsg_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) !!$ complex(psb_dpk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_dnsg_csmm +!!$ end subroutine psb_z_cuda_dnsg_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_dnsg_scal(d,a,info, side) -!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ complex(psb_dpk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_z_dnsg_scal +!!$ end subroutine psb_z_cuda_dnsg_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_dnsg_scals(d,a,info) -!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ complex(psb_dpk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_z_dnsg_scals +!!$ end subroutine psb_z_cuda_dnsg_scals !!$ end interface !!$ @@ -231,11 +231,11 @@ contains - function z_dnsg_get_fmt() result(res) + function z_cuda_dnsg_get_fmt() result(res) implicit none character(len=5) :: res res = 'DNSG' - end function z_dnsg_get_fmt + end function z_cuda_dnsg_get_fmt @@ -251,11 +251,11 @@ contains ! ! == =================================== - subroutine z_dnsg_free(a) + subroutine z_cuda_dnsg_free(a) use dnsdev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) @@ -264,31 +264,31 @@ contains return - end subroutine z_dnsg_free + end subroutine z_cuda_dnsg_free - subroutine z_dnsg_finalize(a) + subroutine z_cuda_dnsg_finalize(a) use dnsdev_mod implicit none - type(psb_z_dnsg_sparse_mat), intent(inout) :: a + type(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine z_dnsg_finalize + end subroutine z_cuda_dnsg_finalize #else interface - subroutine psb_z_dnsg_mold(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_dnsg_mold(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_dnsg_mold + end subroutine psb_z_cuda_dnsg_mold end interface #endif -end module psb_z_dnsg_mat_mod +end module psb_z_cuda_dnsg_mat_mod diff --git a/cuda/psb_z_elg_mat_mod.F90 b/cuda/psb_z_cuda_elg_mat_mod.F90 similarity index 50% rename from cuda/psb_z_elg_mat_mod.F90 rename to cuda/psb_z_cuda_elg_mat_mod.F90 index cf9e479c..9090b0a2 100644 --- a/cuda/psb_z_elg_mat_mod.F90 +++ b/cuda/psb_z_cuda_elg_mat_mod.F90 @@ -30,18 +30,18 @@ ! -module psb_z_elg_mat_mod +module psb_z_cuda_elg_mat_mod use iso_c_binding use psb_z_mat_mod use psb_z_ell_mat_mod - use psb_i_gpu_vect_mod + use psb_i_cuda_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_z_ell_sparse_mat) :: psb_z_elg_sparse_mat + type, extends(psb_z_ell_sparse_mat) :: psb_z_cuda_elg_sparse_mat ! ! ITPACK/ELL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,221 +54,221 @@ module psb_z_elg_mat_mod integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => z_elg_get_fmt - procedure, pass(a) :: sizeof => z_elg_sizeof - procedure, pass(a) :: vect_mv => psb_z_elg_vect_mv - procedure, pass(a) :: csmm => psb_z_elg_csmm - procedure, pass(a) :: csmv => psb_z_elg_csmv - procedure, pass(a) :: in_vect_sv => psb_z_elg_inner_vect_sv - procedure, pass(a) :: scals => psb_z_elg_scals - procedure, pass(a) :: scalv => psb_z_elg_scal - procedure, pass(a) :: reallocate_nz => psb_z_elg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_z_elg_allocate_mnnz - procedure, pass(a) :: reinit => z_elg_reinit + procedure, nopass :: get_fmt => z_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_z_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_z_cuda_elg_scals + procedure, pass(a) :: scalv => psb_z_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => z_cuda_elg_reinit ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_elg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_cp_elg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_elg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_mv_elg_from_fmt - procedure, pass(a) :: free => z_elg_free - procedure, pass(a) :: mold => psb_z_elg_mold - procedure, pass(a) :: csput_a => psb_z_elg_csput_a - procedure, pass(a) :: csput_v => psb_z_elg_csput_v - procedure, pass(a) :: is_host => z_elg_is_host - procedure, pass(a) :: is_dev => z_elg_is_dev - procedure, pass(a) :: is_sync => z_elg_is_sync - procedure, pass(a) :: set_host => z_elg_set_host - procedure, pass(a) :: set_dev => z_elg_set_dev - procedure, pass(a) :: set_sync => z_elg_set_sync - procedure, pass(a) :: sync => z_elg_sync - procedure, pass(a) :: from_gpu => psb_z_elg_from_gpu - procedure, pass(a) :: to_gpu => psb_z_elg_to_gpu - procedure, pass(a) :: asb => psb_z_elg_asb - final :: z_elg_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => z_cuda_elg_free + procedure, pass(a) :: mold => psb_z_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_z_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_z_cuda_elg_csput_v + procedure, pass(a) :: is_host => z_cuda_elg_is_host + procedure, pass(a) :: is_dev => z_cuda_elg_is_dev + procedure, pass(a) :: is_sync => z_cuda_elg_is_sync + procedure, pass(a) :: set_host => z_cuda_elg_set_host + procedure, pass(a) :: set_dev => z_cuda_elg_set_dev + procedure, pass(a) :: set_sync => z_cuda_elg_set_sync + procedure, pass(a) :: sync => z_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_z_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_z_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_z_cuda_elg_asb + final :: z_cuda_elg_finalize #else contains - procedure, pass(a) :: mold => psb_z_elg_mold - procedure, pass(a) :: asb => psb_z_elg_asb + procedure, pass(a) :: mold => psb_z_cuda_elg_mold + procedure, pass(a) :: asb => psb_z_cuda_elg_asb #endif - end type psb_z_elg_sparse_mat + end type psb_z_cuda_elg_sparse_mat #ifdef HAVE_SPGPU - private :: z_elg_get_nzeros, z_elg_free, z_elg_get_fmt, & - & z_elg_get_size, z_elg_sizeof, z_elg_get_nz_row, z_elg_sync + private :: z_cuda_elg_get_nzeros, z_cuda_elg_free, z_cuda_elg_get_fmt, & + & z_cuda_elg_get_size, z_cuda_elg_sizeof, z_cuda_elg_get_nz_row, z_cuda_elg_sync interface - subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_elg_vect_mv + end subroutine psb_z_cuda_elg_vect_mv end interface interface - subroutine psb_z_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_z_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_elg_inner_vect_sv + end subroutine psb_z_cuda_elg_inner_vect_sv end interface interface - subroutine psb_z_elg_reallocate_nz(nz,a) - import :: psb_z_elg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_elg_reallocate_nz(nz,a) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_z_elg_sparse_mat), intent(inout) :: a - end subroutine psb_z_elg_reallocate_nz + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_elg_reallocate_nz end interface interface - subroutine psb_z_elg_allocate_mnnz(m,n,a,nz) - import :: psb_z_elg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_z_elg_allocate_mnnz + end subroutine psb_z_cuda_elg_allocate_mnnz end interface interface - subroutine psb_z_elg_mold(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_mold(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_mold + end subroutine psb_z_cuda_elg_mold end interface interface - subroutine psb_z_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& & imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_csput_a + end subroutine psb_z_cuda_elg_csput_a end interface interface - subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_z_base_vect_type,& + subroutine psb_z_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_z_base_vect_type,& & psb_i_base_vect_type - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_csput_v + end subroutine psb_z_cuda_elg_csput_v end interface interface - subroutine psb_z_elg_from_gpu(a,info) - import :: psb_z_elg_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_elg_from_gpu(a,info) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_from_gpu + end subroutine psb_z_cuda_elg_from_gpu end interface interface - subroutine psb_z_elg_to_gpu(a,info, nzrm) - import :: psb_z_elg_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_z_elg_to_gpu + end subroutine psb_z_cuda_elg_to_gpu end interface interface - subroutine psb_z_cp_elg_from_coo(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_elg_from_coo + end subroutine psb_z_cuda_cp_elg_from_coo end interface interface - subroutine psb_z_cp_elg_from_fmt(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_elg_from_fmt + end subroutine psb_z_cuda_cp_elg_from_fmt end interface interface - subroutine psb_z_mv_elg_from_coo(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_elg_from_coo(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_elg_from_coo + end subroutine psb_z_cuda_mv_elg_from_coo end interface interface - subroutine psb_z_mv_elg_from_fmt(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_elg_from_fmt + end subroutine psb_z_cuda_mv_elg_from_fmt end interface interface - subroutine psb_z_elg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_elg_csmv + end subroutine psb_z_cuda_elg_csmv end interface interface - subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_elg_csmm + end subroutine psb_z_cuda_elg_csmm end interface interface - subroutine psb_z_elg_scal(d,a,info, side) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_elg_scal(d,a,info, side) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_z_elg_scal + end subroutine psb_z_cuda_elg_scal end interface interface - subroutine psb_z_elg_scals(d,a,info) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_elg_scals(d,a,info) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_scals + end subroutine psb_z_cuda_elg_scals end interface interface - subroutine psb_z_elg_asb(a) - import :: psb_z_elg_sparse_mat - class(psb_z_elg_sparse_mat), intent(inout) :: a - end subroutine psb_z_elg_asb + subroutine psb_z_cuda_elg_asb(a) + import :: psb_z_cuda_elg_sparse_mat + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_elg_asb end interface @@ -287,9 +287,9 @@ contains ! == =================================== - function z_elg_sizeof(a) result(res) + function z_cuda_elg_sizeof(a) result(res) implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res if (a%is_dev()) call a%sync() @@ -302,13 +302,13 @@ contains ! on the GPU device side? ! res = 2*res - end function z_elg_sizeof + end function z_cuda_elg_sizeof - function z_elg_get_fmt() result(res) + function z_cuda_elg_get_fmt() result(res) implicit none character(len=5) :: res res = 'ELG' - end function z_elg_get_fmt + end function z_cuda_elg_get_fmt @@ -323,12 +323,12 @@ contains ! ! ! == =================================== - subroutine z_elg_reinit(a,clear) + subroutine z_cuda_elg_reinit(a,clear) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a logical, intent(in), optional :: clear integer(psb_ipk_) :: isz, err_act character(len=20) :: name='reinit' @@ -367,14 +367,14 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine z_elg_reinit + end subroutine z_cuda_elg_reinit - subroutine z_elg_free(a) + subroutine z_cuda_elg_free(a) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) @@ -384,12 +384,12 @@ contains return - end subroutine z_elg_free + end subroutine z_cuda_elg_free - subroutine z_elg_sync(a) + subroutine z_cuda_elg_sync(a) implicit none - class(psb_z_elg_sparse_mat), target, intent(in) :: a - class(psb_z_elg_sparse_mat), pointer :: tmpa + class(psb_z_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -401,83 +401,83 @@ contains call tmpa%set_sync() return - end subroutine z_elg_sync + end subroutine z_cuda_elg_sync - subroutine z_elg_set_host(a) + subroutine z_cuda_elg_set_host(a) implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine z_elg_set_host + end subroutine z_cuda_elg_set_host - subroutine z_elg_set_dev(a) + subroutine z_cuda_elg_set_dev(a) implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine z_elg_set_dev + end subroutine z_cuda_elg_set_dev - subroutine z_elg_set_sync(a) + subroutine z_cuda_elg_set_sync(a) implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine z_elg_set_sync + end subroutine z_cuda_elg_set_sync - function z_elg_is_dev(a) result(res) + function z_cuda_elg_is_dev(a) result(res) implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function z_elg_is_dev + end function z_cuda_elg_is_dev - function z_elg_is_host(a) result(res) + function z_cuda_elg_is_host(a) result(res) implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function z_elg_is_host + end function z_cuda_elg_is_host - function z_elg_is_sync(a) result(res) + function z_cuda_elg_is_sync(a) result(res) implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function z_elg_is_sync + end function z_cuda_elg_is_sync - subroutine z_elg_finalize(a) + subroutine z_cuda_elg_finalize(a) use elldev_mod implicit none - type(psb_z_elg_sparse_mat), intent(inout) :: a + type(psb_z_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine z_elg_finalize + end subroutine z_cuda_elg_finalize #else interface - subroutine psb_z_elg_asb(a) - import :: psb_z_elg_sparse_mat - class(psb_z_elg_sparse_mat), intent(inout) :: a - end subroutine psb_z_elg_asb + subroutine psb_z_cuda_elg_asb(a) + import :: psb_z_cuda_elg_sparse_mat + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_elg_asb end interface interface - subroutine psb_z_elg_mold(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_mold(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_mold + end subroutine psb_z_cuda_elg_mold end interface #endif -end module psb_z_elg_mat_mod +end module psb_z_cuda_elg_mat_mod diff --git a/cuda/psb_z_hdiag_mat_mod.F90 b/cuda/psb_z_cuda_hdiag_mat_mod.F90 similarity index 50% rename from cuda/psb_z_hdiag_mat_mod.F90 rename to cuda/psb_z_cuda_hdiag_mat_mod.F90 index 0b61cb47..b64498f6 100644 --- a/cuda/psb_z_hdiag_mat_mod.F90 +++ b/cuda/psb_z_cuda_hdiag_mat_mod.F90 @@ -30,182 +30,182 @@ ! -module psb_z_hdiag_mat_mod +module psb_z_cuda_hdiag_mat_mod use iso_c_binding use psb_base_mod use psb_z_hdia_mat_mod - type, extends(psb_z_hdia_sparse_mat) :: psb_z_hdiag_sparse_mat + type, extends(psb_z_hdia_sparse_mat) :: psb_z_cuda_hdiag_sparse_mat ! #ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => z_hdiag_get_fmt - ! procedure, pass(a) :: sizeof => z_hdiag_sizeof - procedure, pass(a) :: vect_mv => psb_z_hdiag_vect_mv - ! procedure, pass(a) :: csmm => psb_z_hdiag_csmm - procedure, pass(a) :: csmv => psb_z_hdiag_csmv - ! procedure, pass(a) :: in_vect_sv => psb_z_hdiag_inner_vect_sv - ! procedure, pass(a) :: scals => psb_z_hdiag_scals - ! procedure, pass(a) :: scalv => psb_z_hdiag_scal - ! procedure, pass(a) :: reallocate_nz => psb_z_hdiag_reallocate_nz - ! procedure, pass(a) :: allocate_mnnz => psb_z_hdiag_allocate_mnnz + procedure, nopass :: get_fmt => z_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => z_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_z_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_z_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_z_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_z_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_z_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_z_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_z_cuda_hdiag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_hdiag_from_coo - ! procedure, pass(a) :: cp_from_fmt => psb_z_cp_hdiag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_hdiag_from_coo - ! procedure, pass(a) :: mv_from_fmt => psb_z_mv_hdiag_from_fmt - procedure, pass(a) :: free => z_hdiag_free - procedure, pass(a) :: mold => psb_z_hdiag_mold - procedure, pass(a) :: to_gpu => psb_z_hdiag_to_gpu - final :: z_hdiag_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => z_cuda_hdiag_free + procedure, pass(a) :: mold => psb_z_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_hdiag_to_gpu + final :: z_cuda_hdiag_finalize #else contains - procedure, pass(a) :: mold => psb_z_hdiag_mold + procedure, pass(a) :: mold => psb_z_cuda_hdiag_mold #endif - end type psb_z_hdiag_sparse_mat + end type psb_z_cuda_hdiag_sparse_mat #ifdef HAVE_SPGPU - private :: z_hdiag_get_nzeros, z_hdiag_free, z_hdiag_get_fmt, & - & z_hdiag_get_size, z_hdiag_sizeof, z_hdiag_get_nz_row + private :: z_cuda_hdiag_get_nzeros, z_cuda_hdiag_free, z_cuda_hdiag_get_fmt, & + & z_cuda_hdiag_get_size, z_cuda_hdiag_sizeof, z_cuda_hdiag_get_nz_row interface - subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hdiag_vect_mv + end subroutine psb_z_cuda_hdiag_vect_mv end interface !!$ interface -!!$ subroutine psb_z_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_z_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type -!!$ class(psb_z_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_z_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_hdiag_inner_vect_sv +!!$ end subroutine psb_z_cuda_hdiag_inner_vect_sv !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_hdiag_reallocate_nz(nz,a) -!!$ import :: psb_z_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_z_hdiag_reallocate_nz +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_cuda_hdiag_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_hdiag_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_z_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_z_hdiag_allocate_mnnz +!!$ end subroutine psb_z_cuda_hdiag_allocate_mnnz !!$ end interface interface - subroutine psb_z_hdiag_mold(a,b,info) - import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hdiag_mold(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hdiag_mold + end subroutine psb_z_cuda_hdiag_mold end interface interface - subroutine psb_z_hdiag_to_gpu(a,info) - import :: psb_z_hdiag_sparse_mat, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hdiag_to_gpu(a,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hdiag_to_gpu + end subroutine psb_z_cuda_hdiag_to_gpu end interface interface - subroutine psb_z_cp_hdiag_from_coo(a,b,info) - import :: psb_z_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_hdiag_from_coo + end subroutine psb_z_cuda_cp_hdiag_from_coo end interface !!$ interface -!!$ subroutine psb_z_cp_hdiag_from_fmt(a,b,info) -!!$ import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_z_base_sparse_mat), intent(in) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_z_cp_hdiag_from_fmt +!!$ end subroutine psb_z_cuda_cp_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_z_mv_hdiag_from_coo(a,b,info) - import :: psb_z_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_hdiag_from_coo + end subroutine psb_z_cuda_mv_hdiag_from_coo end interface !!$ !!$ interface -!!$ subroutine psb_z_mv_hdiag_from_fmt(a,b,info) -!!$ import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_z_base_sparse_mat), intent(inout) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_z_mv_hdiag_from_fmt +!!$ end subroutine psb_z_cuda_mv_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hdiag_csmv + end subroutine psb_z_cuda_hdiag_csmv end interface !!$ interface -!!$ subroutine psb_z_hdiag_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) !!$ complex(psb_dpk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_hdiag_csmm +!!$ end subroutine psb_z_cuda_hdiag_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_hdiag_scal(d,a,info, side) -!!$ import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ complex(psb_dpk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_z_hdiag_scal +!!$ end subroutine psb_z_cuda_hdiag_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_hdiag_scals(d,a,info) -!!$ import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ complex(psb_dpk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_z_hdiag_scals +!!$ end subroutine psb_z_cuda_hdiag_scals !!$ end interface !!$ @@ -223,11 +223,11 @@ contains ! ! == =================================== - function z_hdiag_get_fmt() result(res) + function z_cuda_hdiag_get_fmt() result(res) implicit none character(len=5) :: res res = 'HDIAG' - end function z_hdiag_get_fmt + end function z_cuda_hdiag_get_fmt @@ -243,11 +243,11 @@ contains ! ! == =================================== - subroutine z_hdiag_free(a) + subroutine z_cuda_hdiag_free(a) use hdiagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -256,12 +256,12 @@ contains return - end subroutine z_hdiag_free + end subroutine z_cuda_hdiag_free - subroutine z_hdiag_finalize(a) + subroutine z_cuda_hdiag_finalize(a) use hdiagdev_mod implicit none - type(psb_z_hdiag_sparse_mat), intent(inout) :: a + type(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -269,19 +269,19 @@ contains call a%psb_z_hdia_sparse_mat%free() return - end subroutine z_hdiag_finalize + end subroutine z_cuda_hdiag_finalize #else interface - subroutine psb_z_hdiag_mold(a,b,info) - import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hdiag_mold(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hdiag_mold + end subroutine psb_z_cuda_hdiag_mold end interface #endif -end module psb_z_hdiag_mat_mod +end module psb_z_cuda_hdiag_mat_mod diff --git a/cuda/psb_z_hlg_mat_mod.F90 b/cuda/psb_z_cuda_hlg_mat_mod.F90 similarity index 50% rename from cuda/psb_z_hlg_mat_mod.F90 rename to cuda/psb_z_cuda_hlg_mat_mod.F90 index 09d490b3..29ed68fa 100644 --- a/cuda/psb_z_hlg_mat_mod.F90 +++ b/cuda/psb_z_cuda_hlg_mat_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_z_hlg_mat_mod +module psb_z_cuda_hlg_mat_mod use iso_c_binding use psb_z_mat_mod @@ -41,7 +41,7 @@ module psb_z_hlg_mat_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_z_hll_sparse_mat) :: psb_z_hlg_sparse_mat + type, extends(psb_z_hll_sparse_mat) :: psb_z_cuda_hlg_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,186 +54,186 @@ module psb_z_hlg_mat_mod integer :: devstate = is_host contains - procedure, nopass :: get_fmt => z_hlg_get_fmt - procedure, pass(a) :: sizeof => z_hlg_sizeof - procedure, pass(a) :: vect_mv => psb_z_hlg_vect_mv - procedure, pass(a) :: csmm => psb_z_hlg_csmm - procedure, pass(a) :: csmv => psb_z_hlg_csmv - procedure, pass(a) :: in_vect_sv => psb_z_hlg_inner_vect_sv - procedure, pass(a) :: scals => psb_z_hlg_scals - procedure, pass(a) :: scalv => psb_z_hlg_scal - procedure, pass(a) :: reallocate_nz => psb_z_hlg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_z_hlg_allocate_mnnz + procedure, nopass :: get_fmt => z_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_z_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_z_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_z_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_hlg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_hlg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_cp_hlg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_hlg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_mv_hlg_from_fmt - procedure, pass(a) :: free => z_hlg_free - procedure, pass(a) :: mold => psb_z_hlg_mold - procedure, pass(a) :: is_host => z_hlg_is_host - procedure, pass(a) :: is_dev => z_hlg_is_dev - procedure, pass(a) :: is_sync => z_hlg_is_sync - procedure, pass(a) :: set_host => z_hlg_set_host - procedure, pass(a) :: set_dev => z_hlg_set_dev - procedure, pass(a) :: set_sync => z_hlg_set_sync - procedure, pass(a) :: sync => z_hlg_sync - procedure, pass(a) :: from_gpu => psb_z_hlg_from_gpu - procedure, pass(a) :: to_gpu => psb_z_hlg_to_gpu - final :: z_hlg_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => z_cuda_hlg_free + procedure, pass(a) :: mold => psb_z_cuda_hlg_mold + procedure, pass(a) :: is_host => z_cuda_hlg_is_host + procedure, pass(a) :: is_dev => z_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => z_cuda_hlg_is_sync + procedure, pass(a) :: set_host => z_cuda_hlg_set_host + procedure, pass(a) :: set_dev => z_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => z_cuda_hlg_set_sync + procedure, pass(a) :: sync => z_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_z_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_z_cuda_hlg_to_gpu + final :: z_cuda_hlg_finalize #else contains - procedure, pass(a) :: mold => psb_z_hlg_mold + procedure, pass(a) :: mold => psb_z_cuda_hlg_mold #endif - end type psb_z_hlg_sparse_mat + end type psb_z_cuda_hlg_sparse_mat #ifdef HAVE_SPGPU - private :: z_hlg_get_nzeros, z_hlg_free, z_hlg_get_fmt, & - & z_hlg_get_size, z_hlg_sizeof, z_hlg_get_nz_row + private :: z_cuda_hlg_get_nzeros, z_cuda_hlg_free, z_cuda_hlg_get_fmt, & + & z_cuda_hlg_get_size, z_cuda_hlg_sizeof, z_cuda_hlg_get_nz_row interface - subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hlg_vect_mv + end subroutine psb_z_cuda_hlg_vect_mv end interface interface - subroutine psb_z_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_z_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hlg_inner_vect_sv + end subroutine psb_z_cuda_hlg_inner_vect_sv end interface interface - subroutine psb_z_hlg_reallocate_nz(nz,a) - import :: psb_z_hlg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_hlg_reallocate_nz(nz,a) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_z_hlg_sparse_mat), intent(inout) :: a - end subroutine psb_z_hlg_reallocate_nz + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_hlg_reallocate_nz end interface interface - subroutine psb_z_hlg_allocate_mnnz(m,n,a,nz) - import :: psb_z_hlg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_z_hlg_allocate_mnnz + end subroutine psb_z_cuda_hlg_allocate_mnnz end interface interface - subroutine psb_z_hlg_mold(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_mold(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hlg_mold + end subroutine psb_z_cuda_hlg_mold end interface interface - subroutine psb_z_hlg_from_gpu(a,info) - import :: psb_z_hlg_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hlg_from_gpu(a,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hlg_from_gpu + end subroutine psb_z_cuda_hlg_from_gpu end interface interface - subroutine psb_z_hlg_to_gpu(a,info, nzrm) - import :: psb_z_hlg_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_z_hlg_to_gpu + end subroutine psb_z_cuda_hlg_to_gpu end interface interface - subroutine psb_z_cp_hlg_from_coo(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_hlg_from_coo + end subroutine psb_z_cuda_cp_hlg_from_coo end interface interface - subroutine psb_z_cp_hlg_from_fmt(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_hlg_from_fmt + end subroutine psb_z_cuda_cp_hlg_from_fmt end interface interface - subroutine psb_z_mv_hlg_from_coo(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_hlg_from_coo + end subroutine psb_z_cuda_mv_hlg_from_coo end interface interface - subroutine psb_z_mv_hlg_from_fmt(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_hlg_from_fmt + end subroutine psb_z_cuda_mv_hlg_from_fmt end interface interface - subroutine psb_z_hlg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hlg_csmv + end subroutine psb_z_cuda_hlg_csmv end interface interface - subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hlg_csmm + end subroutine psb_z_cuda_hlg_csmm end interface interface - subroutine psb_z_hlg_scal(d,a,info, side) - import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hlg_scal(d,a,info, side) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_z_hlg_scal + end subroutine psb_z_cuda_hlg_scal end interface interface - subroutine psb_z_hlg_scals(d,a,info) - import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hlg_scals(d,a,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hlg_scals + end subroutine psb_z_cuda_hlg_scals end interface @@ -252,9 +252,9 @@ contains ! == =================================== - function z_hlg_sizeof(a) result(res) + function z_cuda_hlg_sizeof(a) result(res) implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res @@ -269,13 +269,13 @@ contains ! on the GPU device side? ! res = 2*res - end function z_hlg_sizeof + end function z_cuda_hlg_sizeof - function z_hlg_get_fmt() result(res) + function z_cuda_hlg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HLG' - end function z_hlg_get_fmt + end function z_cuda_hlg_get_fmt @@ -291,11 +291,11 @@ contains ! ! == =================================== - subroutine z_hlg_free(a) + subroutine z_cuda_hlg_free(a) use hlldev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) @@ -304,13 +304,13 @@ contains return - end subroutine z_hlg_free + end subroutine z_cuda_hlg_free - subroutine z_hlg_sync(a) + subroutine z_cuda_hlg_sync(a) implicit none - class(psb_z_hlg_sparse_mat), target, intent(in) :: a - class(psb_z_hlg_sparse_mat), pointer :: tmpa + class(psb_z_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -322,77 +322,77 @@ contains call tmpa%set_sync() return - end subroutine z_hlg_sync + end subroutine z_cuda_hlg_sync - subroutine z_hlg_set_host(a) + subroutine z_cuda_hlg_set_host(a) implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine z_hlg_set_host + end subroutine z_cuda_hlg_set_host - subroutine z_hlg_set_dev(a) + subroutine z_cuda_hlg_set_dev(a) implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine z_hlg_set_dev + end subroutine z_cuda_hlg_set_dev - subroutine z_hlg_set_sync(a) + subroutine z_cuda_hlg_set_sync(a) implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine z_hlg_set_sync + end subroutine z_cuda_hlg_set_sync - function z_hlg_is_dev(a) result(res) + function z_cuda_hlg_is_dev(a) result(res) implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function z_hlg_is_dev + end function z_cuda_hlg_is_dev - function z_hlg_is_host(a) result(res) + function z_cuda_hlg_is_host(a) result(res) implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function z_hlg_is_host + end function z_cuda_hlg_is_host - function z_hlg_is_sync(a) result(res) + function z_cuda_hlg_is_sync(a) result(res) implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function z_hlg_is_sync + end function z_cuda_hlg_is_sync - subroutine z_hlg_finalize(a) + subroutine z_cuda_hlg_finalize(a) use hlldev_mod implicit none - type(psb_z_hlg_sparse_mat), intent(inout) :: a + type(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine z_hlg_finalize + end subroutine z_cuda_hlg_finalize #else interface - subroutine psb_z_hlg_mold(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_mold(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hlg_mold + end subroutine psb_z_cuda_hlg_mold end interface #endif -end module psb_z_hlg_mat_mod +end module psb_z_cuda_hlg_mat_mod diff --git a/cuda/psb_z_hybg_mat_mod.F90 b/cuda/psb_z_cuda_hybg_mat_mod.F90 similarity index 52% rename from cuda/psb_z_hybg_mat_mod.F90 rename to cuda/psb_z_cuda_hybg_mat_mod.F90 index 465677e3..1bbc11b2 100644 --- a/cuda/psb_z_hybg_mat_mod.F90 +++ b/cuda/psb_z_cuda_hybg_mat_mod.F90 @@ -31,13 +31,13 @@ #if CUDA_SHORT_VERSION <= 10 -module psb_z_hybg_mat_mod +module psb_z_cuda_hybg_mat_mod use iso_c_binding use psb_z_mat_mod use cusparse_mod - type, extends(psb_z_csr_sparse_mat) :: psb_z_hybg_sparse_mat + type, extends(psb_z_csr_sparse_mat) :: psb_z_cuda_hybg_sparse_mat ! ! HYBG. An interface to the cuSPARSE HYB ! On the CPU side we keep a CSR storage. @@ -49,170 +49,170 @@ module psb_z_hybg_mat_mod type(z_Hmat) :: deviceMat contains - procedure, nopass :: get_fmt => z_hybg_get_fmt - procedure, pass(a) :: sizeof => z_hybg_sizeof - procedure, pass(a) :: vect_mv => psb_z_hybg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_z_hybg_inner_vect_sv - procedure, pass(a) :: csmm => psb_z_hybg_csmm - procedure, pass(a) :: csmv => psb_z_hybg_csmv - procedure, pass(a) :: scals => psb_z_hybg_scals - procedure, pass(a) :: scalv => psb_z_hybg_scal - procedure, pass(a) :: reallocate_nz => psb_z_hybg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_z_hybg_allocate_mnnz + procedure, nopass :: get_fmt => z_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_z_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_z_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_hybg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_hybg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_cp_hybg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_hybg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_mv_hybg_from_fmt - procedure, pass(a) :: free => z_hybg_free - procedure, pass(a) :: mold => psb_z_hybg_mold - procedure, pass(a) :: to_gpu => psb_z_hybg_to_gpu - final :: z_hybg_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => z_cuda_hybg_free + procedure, pass(a) :: mold => psb_z_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_hybg_to_gpu + final :: z_cuda_hybg_finalize #else contains - procedure, pass(a) :: mold => psb_z_hybg_mold + procedure, pass(a) :: mold => psb_z_cuda_hybg_mold #endif - end type psb_z_hybg_sparse_mat + end type psb_z_cuda_hybg_sparse_mat #ifdef HAVE_SPGPU - private :: z_hybg_get_nzeros, z_hybg_free, z_hybg_get_fmt, & - & z_hybg_get_size, z_hybg_sizeof, z_hybg_get_nz_row + private :: z_cuda_hybg_get_nzeros, z_cuda_hybg_free, z_cuda_hybg_get_fmt, & + & z_cuda_hybg_get_size, z_cuda_hybg_sizeof, z_cuda_hybg_get_nz_row interface - subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hybg_inner_vect_sv + end subroutine psb_z_cuda_hybg_inner_vect_sv end interface interface - subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hybg_vect_mv + end subroutine psb_z_cuda_hybg_vect_mv end interface interface - subroutine psb_z_hybg_reallocate_nz(nz,a) - import :: psb_z_hybg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_hybg_reallocate_nz(nz,a) + import :: psb_z_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_z_hybg_sparse_mat), intent(inout) :: a - end subroutine psb_z_hybg_reallocate_nz + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_hybg_reallocate_nz end interface interface - subroutine psb_z_hybg_allocate_mnnz(m,n,a,nz) - import :: psb_z_hybg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_z_hybg_allocate_mnnz + end subroutine psb_z_cuda_hybg_allocate_mnnz end interface interface - subroutine psb_z_hybg_mold(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_mold(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hybg_mold + end subroutine psb_z_cuda_hybg_mold end interface interface - subroutine psb_z_hybg_to_gpu(a,info, nzrm) - import :: psb_z_hybg_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_z_hybg_to_gpu + end subroutine psb_z_cuda_hybg_to_gpu end interface interface - subroutine psb_z_cp_hybg_from_coo(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_hybg_from_coo + end subroutine psb_z_cuda_cp_hybg_from_coo end interface interface - subroutine psb_z_cp_hybg_from_fmt(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_hybg_from_fmt + end subroutine psb_z_cuda_cp_hybg_from_fmt end interface interface - subroutine psb_z_mv_hybg_from_coo(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_hybg_from_coo + end subroutine psb_z_cuda_mv_hybg_from_coo end interface interface - subroutine psb_z_mv_hybg_from_fmt(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_hybg_from_fmt + end subroutine psb_z_cuda_mv_hybg_from_fmt end interface interface - subroutine psb_z_hybg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hybg_csmv + end subroutine psb_z_cuda_hybg_csmv end interface interface - subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hybg_csmm + end subroutine psb_z_cuda_hybg_csmm end interface interface - subroutine psb_z_hybg_scal(d,a,info,side) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hybg_scal(d,a,info,side) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_z_hybg_scal + end subroutine psb_z_cuda_hybg_scal end interface interface - subroutine psb_z_hybg_scals(d,a,info) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hybg_scals(d,a,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hybg_scals + end subroutine psb_z_cuda_hybg_scals end interface @@ -231,9 +231,9 @@ contains ! == =================================== - function z_hybg_sizeof(a) result(res) + function z_cuda_hybg_sizeof(a) result(res) implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 res = res + (2*psb_sizeof_dp) * size(a%val) @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function z_hybg_sizeof + end function z_cuda_hybg_sizeof - function z_hybg_get_fmt() result(res) + function z_cuda_hybg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HYBG' - end function z_hybg_get_fmt + end function z_cuda_hybg_get_fmt @@ -265,42 +265,42 @@ contains ! ! == =================================== - subroutine z_hybg_free(a) + subroutine z_cuda_hybg_free(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) call a%psb_z_csr_sparse_mat%free() return - end subroutine z_hybg_free + end subroutine z_cuda_hybg_free - subroutine z_hybg_finalize(a) + subroutine z_cuda_hybg_finalize(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - type(psb_z_hybg_sparse_mat), intent(inout) :: a + type(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) return - end subroutine z_hybg_finalize + end subroutine z_cuda_hybg_finalize #else interface - subroutine psb_z_hybg_mold(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_mold(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hybg_mold + end subroutine psb_z_cuda_hybg_mold end interface #endif -end module psb_z_hybg_mat_mod +end module psb_z_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_z_gpu_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 similarity index 72% rename from cuda/psb_z_gpu_vect_mod.F90 rename to cuda/psb_z_cuda_vect_mod.F90 index ca5ac922..35bfb4b5 100644 --- a/cuda/psb_z_gpu_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -30,15 +30,15 @@ ! -module psb_z_gpu_vect_mod +module psb_z_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod use psb_z_vect_mod use psb_i_vect_mod #ifdef HAVE_SPGPU - use psb_gpu_env_mod - use psb_i_gpu_vect_mod + use psb_cuda_env_mod + use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_z_vectordev_mod #endif @@ -47,7 +47,7 @@ module psb_z_gpu_vect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_z_base_vect_type) :: psb_z_vect_gpu + type, extends(psb_z_base_vect_type) :: psb_z_vect_cuda #ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr @@ -59,66 +59,66 @@ module psb_z_gpu_vect_mod type(c_ptr) :: i_buf = c_null_ptr integer :: i_buf_sz = 0 contains - procedure, pass(x) :: get_nrows => z_gpu_get_nrows - procedure, nopass :: get_fmt => z_gpu_get_fmt - - procedure, pass(x) :: all => z_gpu_all - procedure, pass(x) :: zero => z_gpu_zero - procedure, pass(x) :: asb_m => z_gpu_asb_m - procedure, pass(x) :: sync => z_gpu_sync - procedure, pass(x) :: sync_space => z_gpu_sync_space - procedure, pass(x) :: bld_x => z_gpu_bld_x - procedure, pass(x) :: bld_mn => z_gpu_bld_mn - procedure, pass(x) :: free => z_gpu_free - procedure, pass(x) :: ins_a => z_gpu_ins_a - procedure, pass(x) :: ins_v => z_gpu_ins_v - procedure, pass(x) :: is_host => z_gpu_is_host - procedure, pass(x) :: is_dev => z_gpu_is_dev - procedure, pass(x) :: is_sync => z_gpu_is_sync - procedure, pass(x) :: set_host => z_gpu_set_host - procedure, pass(x) :: set_dev => z_gpu_set_dev - procedure, pass(x) :: set_sync => z_gpu_set_sync - procedure, pass(x) :: set_scal => z_gpu_set_scal -!!$ procedure, pass(x) :: set_vect => z_gpu_set_vect - procedure, pass(x) :: gthzv_x => z_gpu_gthzv_x - procedure, pass(y) :: sctb => z_gpu_sctb - procedure, pass(y) :: sctb_x => z_gpu_sctb_x - procedure, pass(x) :: gthzbuf => z_gpu_gthzbuf - procedure, pass(y) :: sctb_buf => z_gpu_sctb_buf - procedure, pass(x) :: new_buffer => z_gpu_new_buffer - procedure, nopass :: device_wait => z_gpu_device_wait - procedure, pass(x) :: free_buffer => z_gpu_free_buffer - procedure, pass(x) :: maybe_free_buffer => z_gpu_maybe_free_buffer - procedure, pass(x) :: dot_v => z_gpu_dot_v - procedure, pass(x) :: dot_a => z_gpu_dot_a - procedure, pass(y) :: axpby_v => z_gpu_axpby_v - procedure, pass(y) :: axpby_a => z_gpu_axpby_a - procedure, pass(y) :: mlt_v => z_gpu_mlt_v - procedure, pass(y) :: mlt_a => z_gpu_mlt_a - procedure, pass(z) :: mlt_a_2 => z_gpu_mlt_a_2 - procedure, pass(z) :: mlt_v_2 => z_gpu_mlt_v_2 - procedure, pass(x) :: scal => z_gpu_scal - procedure, pass(x) :: nrm2 => z_gpu_nrm2 - procedure, pass(x) :: amax => z_gpu_amax - procedure, pass(x) :: asum => z_gpu_asum - procedure, pass(x) :: absval1 => z_gpu_absval1 - procedure, pass(x) :: absval2 => z_gpu_absval2 - - final :: z_gpu_vect_finalize + procedure, pass(x) :: get_nrows => z_cuda_get_nrows + procedure, nopass :: get_fmt => z_cuda_get_fmt + + procedure, pass(x) :: all => z_cuda_all + procedure, pass(x) :: zero => z_cuda_zero + procedure, pass(x) :: asb_m => z_cuda_asb_m + procedure, pass(x) :: sync => z_cuda_sync + procedure, pass(x) :: sync_space => z_cuda_sync_space + procedure, pass(x) :: bld_x => z_cuda_bld_x + procedure, pass(x) :: bld_mn => z_cuda_bld_mn + procedure, pass(x) :: free => z_cuda_free + procedure, pass(x) :: ins_a => z_cuda_ins_a + procedure, pass(x) :: ins_v => z_cuda_ins_v + procedure, pass(x) :: is_host => z_cuda_is_host + procedure, pass(x) :: is_dev => z_cuda_is_dev + procedure, pass(x) :: is_sync => z_cuda_is_sync + procedure, pass(x) :: set_host => z_cuda_set_host + procedure, pass(x) :: set_dev => z_cuda_set_dev + procedure, pass(x) :: set_sync => z_cuda_set_sync + procedure, pass(x) :: set_scal => z_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => z_cuda_set_vect + procedure, pass(x) :: gthzv_x => z_cuda_gthzv_x + procedure, pass(y) :: sctb => z_cuda_sctb + procedure, pass(y) :: sctb_x => z_cuda_sctb_x + procedure, pass(x) :: gthzbuf => z_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => z_cuda_sctb_buf + procedure, pass(x) :: new_buffer => z_cuda_new_buffer + procedure, nopass :: device_wait => z_cuda_device_wait + procedure, pass(x) :: free_buffer => z_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => z_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => z_cuda_dot_v + procedure, pass(x) :: dot_a => z_cuda_dot_a + procedure, pass(y) :: axpby_v => z_cuda_axpby_v + procedure, pass(y) :: axpby_a => z_cuda_axpby_a + procedure, pass(y) :: mlt_v => z_cuda_mlt_v + procedure, pass(y) :: mlt_a => z_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => z_cuda_mlt_v_2 + procedure, pass(x) :: scal => z_cuda_scal + procedure, pass(x) :: nrm2 => z_cuda_nrm2 + procedure, pass(x) :: amax => z_cuda_amax + procedure, pass(x) :: asum => z_cuda_asum + procedure, pass(x) :: absval1 => z_cuda_absval1 + procedure, pass(x) :: absval2 => z_cuda_absval2 + + final :: z_cuda_vect_finalize #endif - end type psb_z_vect_gpu + end type psb_z_vect_cuda - public :: psb_z_vect_gpu_ + public :: psb_z_vect_cuda_ private :: constructor - interface psb_z_vect_gpu_ + interface psb_z_vect_cuda_ module procedure constructor - end interface psb_z_vect_gpu_ + end interface psb_z_vect_cuda_ contains function constructor(x) result(this) complex(psb_dpk_) :: x(:) - type(psb_z_vect_gpu) :: this + type(psb_z_vect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -128,20 +128,20 @@ contains #ifdef HAVE_SPGPU - subroutine z_gpu_device_wait() + subroutine z_cuda_device_wait() call psb_cudaSync() - end subroutine z_gpu_device_wait + end subroutine z_cuda_device_wait - subroutine z_gpu_new_buffer(n,x,info) + subroutine z_cuda_new_buffer(n,x,info) use psb_realloc_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(x%combuf)) then if (size(x%combuf) idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then ! ! Only need a sync in this branch; in the others ! cudamemCpy acts as a sync point. @@ -331,14 +331,14 @@ contains end select - end subroutine z_gpu_gthzv_x + end subroutine z_cuda_gthzv_x - subroutine z_gpu_gthzbuf(i,n,idx,x) - use psb_gpu_env_mod + subroutine z_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i,n class(psb_i_base_vect_type) :: idx - class(psb_z_vect_gpu) :: x + class(psb_z_vect_cuda) :: x integer :: info, ni info = 0 @@ -349,11 +349,11 @@ contains end if select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) @@ -384,14 +384,14 @@ contains end select - end subroutine z_gpu_gthzbuf + end subroutine z_cuda_gthzbuf - subroutine z_gpu_sctb(n,idx,x,beta,y) + subroutine z_cuda_sctb(n,idx,x,beta,y) implicit none !use psb_const_mod integer(psb_ipk_) :: n, idx(:) complex(psb_dpk_) :: beta, x(:) - class(psb_z_vect_gpu) :: y + class(psb_z_vect_cuda) :: y integer(psb_ipk_) :: info if (n == 0) return @@ -401,24 +401,24 @@ contains call y%psb_z_base_vect_type%sctb(n,idx,x,beta) call y%set_host() - end subroutine z_gpu_sctb + end subroutine z_cuda_sctb - subroutine z_gpu_sctb_x(i,n,idx,x,beta,y) - use psb_gpu_env_mod + subroutine z_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx complex(psb_dpk_) :: beta, x(:) - class(psb_z_vect_gpu) :: y + class(psb_z_vect_cuda) :: y integer :: info, ni select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() ! - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(y%pinned_buffer)) then if (size(y%pinned_buffer) < n) then call inner_unregister(y%pinned_buffer) @@ -506,16 +506,16 @@ contains call psb_cudaSync() call y%set_dev() - end subroutine z_gpu_sctb_x + end subroutine z_cuda_sctb_x - subroutine z_gpu_sctb_buf(i,n,idx,beta,y) + subroutine z_cuda_sctb_buf(i,n,idx,beta,y) use psi_serial_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx complex(psb_dpk_) :: beta - class(psb_z_vect_gpu) :: y + class(psb_z_vect_cuda) :: y integer(psb_ipk_) :: info, ni !!$ write(0,*) 'Starting sctb_buf' @@ -526,11 +526,11 @@ contains select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) else @@ -557,106 +557,106 @@ contains end select !!$ write(0,*) 'Done sctb_buf' - end subroutine z_gpu_sctb_buf + end subroutine z_cuda_sctb_buf - subroutine z_gpu_bld_x(x,this) + subroutine z_cuda_bld_x(x,this) use psb_base_mod complex(psb_dpk_), intent(in) :: this(:) - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call psb_realloc(size(this),x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'z_gpu_bld_x',& + call psb_errpush(info,'z_cuda_bld_x',& & i_err=(/size(this),izero,izero,izero,izero/)) end if x%v(:) = this(:) call x%set_host() call x%sync() - end subroutine z_gpu_bld_x + end subroutine z_cuda_bld_x - subroutine z_gpu_bld_mn(x,n) + subroutine z_cuda_bld_mn(x,n) integer(psb_mpk_), intent(in) :: n - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(n,info) if (info /= 0) then - call psb_errpush(info,'z_gpu_bld_n',i_err=(/n,n,n,n,n/)) + call psb_errpush(info,'z_cuda_bld_n',i_err=(/n,n,n,n,n/)) end if - end subroutine z_gpu_bld_mn + end subroutine z_cuda_bld_mn - subroutine z_gpu_set_host(x) + subroutine z_cuda_set_host(x) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x x%state = is_host - end subroutine z_gpu_set_host + end subroutine z_cuda_set_host - subroutine z_gpu_set_dev(x) + subroutine z_cuda_set_dev(x) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x x%state = is_dev - end subroutine z_gpu_set_dev + end subroutine z_cuda_set_dev - subroutine z_gpu_set_sync(x) + subroutine z_cuda_set_sync(x) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x x%state = is_sync - end subroutine z_gpu_set_sync + end subroutine z_cuda_set_sync - function z_gpu_is_dev(x) result(res) + function z_cuda_is_dev(x) result(res) implicit none - class(psb_z_vect_gpu), intent(in) :: x + class(psb_z_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function z_gpu_is_dev + end function z_cuda_is_dev - function z_gpu_is_host(x) result(res) + function z_cuda_is_host(x) result(res) implicit none - class(psb_z_vect_gpu), intent(in) :: x + class(psb_z_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function z_gpu_is_host + end function z_cuda_is_host - function z_gpu_is_sync(x) result(res) + function z_cuda_is_sync(x) result(res) implicit none - class(psb_z_vect_gpu), intent(in) :: x + class(psb_z_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function z_gpu_is_sync + end function z_cuda_is_sync - function z_gpu_get_nrows(x) result(res) + function z_cuda_get_nrows(x) result(res) implicit none - class(psb_z_vect_gpu), intent(in) :: x + class(psb_z_vect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = size(x%v) - end function z_gpu_get_nrows + end function z_cuda_get_nrows - function z_gpu_get_fmt() result(res) + function z_cuda_get_fmt() result(res) implicit none character(len=5) :: res res = 'zGPU' - end function z_gpu_get_fmt + end function z_cuda_get_fmt - subroutine z_gpu_all(n, x, info) + subroutine z_cuda_all(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: n - class(psb_z_vect_gpu), intent(out) :: x + class(psb_z_vect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(n,x%v,info) @@ -664,26 +664,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'z_gpu_all',& + call psb_errpush(info,'z_cuda_all',& & i_err=(/n,n,n,n,n/)) end if - end subroutine z_gpu_all + end subroutine z_cuda_all - subroutine z_gpu_zero(x) + subroutine z_cuda_zero(x) use psi_serial_mod implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=zzero call x%set_host() - end subroutine z_gpu_zero + end subroutine z_cuda_zero - subroutine z_gpu_asb_m(n, x, info) + subroutine z_cuda_asb_m(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_mpk_), intent(in) :: n - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: nd @@ -703,12 +703,12 @@ contains end if end if - end subroutine z_gpu_asb_m + end subroutine z_cuda_asb_m - subroutine z_gpu_sync_space(x,info) + subroutine z_cuda_sync_space(x,info) use psb_base_mod, only : psb_realloc implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh, nd @@ -747,12 +747,12 @@ contains end if end if - end subroutine z_gpu_sync_space + end subroutine z_cuda_sync_space - subroutine z_gpu_sync(x) + subroutine z_cuda_sync(x) use psb_base_mod, only : psb_realloc implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -778,31 +778,31 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'z_gpu_sync') + call psb_errpush(info,'z_cuda_sync') end if - end subroutine z_gpu_sync + end subroutine z_cuda_sync - subroutine z_gpu_free(x, info) + subroutine z_cuda_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) if (c_associated(x%deviceVect)) then -!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' call freeMultiVecDevice(x%deviceVect) x%deviceVect=c_null_ptr end if call x%free_buffer(info) call x%set_sync() - end subroutine z_gpu_free + end subroutine z_cuda_free - subroutine z_gpu_set_scal(x,val,first,last) - class(psb_z_vect_gpu), intent(inout) :: x + subroutine z_cuda_set_scal(x,val,first,last) + class(psb_z_vect_cuda), intent(inout) :: x complex(psb_dpk_), intent(in) :: val integer(psb_ipk_), optional :: first, last @@ -817,10 +817,10 @@ contains info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() - end subroutine z_gpu_set_scal + end subroutine z_cuda_set_scal !!$ -!!$ subroutine z_gpu_set_vect(x,val) -!!$ class(psb_z_vect_gpu), intent(inout) :: x +!!$ subroutine z_cuda_set_vect(x,val) +!!$ class(psb_z_vect_cuda), intent(inout) :: x !!$ complex(psb_dpk_), intent(in) :: val(:) !!$ integer(psb_ipk_) :: nr !!$ integer(psb_ipk_) :: info @@ -829,13 +829,13 @@ contains !!$ call x%psb_z_base_vect_type%set_vect(val) !!$ call x%set_host() !!$ -!!$ end subroutine z_gpu_set_vect +!!$ end subroutine z_cuda_set_vect - function z_gpu_dot_v(n,x,y) result(res) + function z_cuda_dot_v(n,x,y) result(res) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n complex(psb_dpk_) :: res @@ -852,13 +852,13 @@ contains type is (psb_z_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n,x%v,1,yy%v,1) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) if (info /= 0) then info = psb_err_internal_error_ - call psb_errpush(info,'z_gpu_dot_v') + call psb_errpush(info,'z_cuda_dot_v') end if class default @@ -867,11 +867,11 @@ contains res = y%dot(n,x%v) end select - end function z_gpu_dot_v + end function z_cuda_dot_v - function z_gpu_dot_a(n,x,y) result(res) + function z_cuda_dot_a(n,x,y) result(res) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x complex(psb_dpk_), intent(in) :: y(:) integer(psb_ipk_), intent(in) :: n complex(psb_dpk_) :: res @@ -880,14 +880,14 @@ contains if (x%is_dev()) call x%sync() res = ddot(n,y,1,x%v,1) - end function z_gpu_dot_a + end function z_cuda_dot_a - subroutine z_gpu_axpby_v(m,alpha, x, beta, y, info) + subroutine z_cuda_axpby_v(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m class(psb_z_base_vect_type), intent(inout) :: x - class(psb_z_vect_gpu), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: y complex(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny @@ -895,7 +895,7 @@ contains info = psb_success_ select type(xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) ! Do something different here if ((beta /= zzero).and.y%is_host())& & call y%sync() @@ -915,14 +915,14 @@ contains call y%axpby(m,alpha,x%v,beta,info) end select - end subroutine z_gpu_axpby_v + end subroutine z_cuda_axpby_v - subroutine z_gpu_axpby_a(m,alpha, x, beta, y, info) + subroutine z_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m complex(psb_dpk_), intent(in) :: x(:) - class(psb_z_vect_gpu), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: y complex(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info @@ -930,13 +930,13 @@ contains & call y%sync() call psb_geaxpby(m,alpha,x,beta,y%v,info) call y%set_host() - end subroutine z_gpu_axpby_a + end subroutine z_cuda_axpby_a - subroutine z_gpu_mlt_v(x, y, info) + subroutine z_cuda_mlt_v(x, y, info) use psi_serial_mod implicit none class(psb_z_base_vect_type), intent(inout) :: x - class(psb_z_vect_gpu), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -950,7 +950,7 @@ contains y%v(i) = y%v(i) * xx%v(i) end do call y%set_host() - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) ! Do something different here if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -963,13 +963,13 @@ contains call y%set_host() end select - end subroutine z_gpu_mlt_v + end subroutine z_cuda_mlt_v - subroutine z_gpu_mlt_a(x, y, info) + subroutine z_cuda_mlt_a(x, y, info) use psi_serial_mod implicit none complex(psb_dpk_), intent(in) :: x(:) - class(psb_z_vect_gpu), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -977,15 +977,15 @@ contains if (y%is_dev()) call y%sync() call y%psb_z_base_vect_type%mlt(x,info) ! set_host() is invoked in the base method - end subroutine z_gpu_mlt_a + end subroutine z_cuda_mlt_a - subroutine z_gpu_mlt_a_2(alpha,x,y,beta,z,info) + subroutine z_cuda_mlt_a_2(alpha,x,y,beta,z,info) use psi_serial_mod implicit none complex(psb_dpk_), intent(in) :: alpha,beta complex(psb_dpk_), intent(in) :: x(:) complex(psb_dpk_), intent(in) :: y(:) - class(psb_z_vect_gpu), intent(inout) :: z + class(psb_z_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -993,16 +993,16 @@ contains if (z%is_dev()) call z%sync() call z%psb_z_base_vect_type%mlt(alpha,x,y,beta,info) ! set_host() is invoked in the base method - end subroutine z_gpu_mlt_a_2 + end subroutine z_cuda_mlt_a_2 - subroutine z_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + subroutine z_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) use psi_serial_mod use psb_string_mod implicit none complex(psb_dpk_), intent(in) :: alpha,beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y - class(psb_z_vect_gpu), intent(inout) :: z + class(psb_z_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n @@ -1025,9 +1025,9 @@ contains ! info = 0 select type(xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type (yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= zzero).and.(z%is_host())) call z%sync() @@ -1049,23 +1049,23 @@ contains call z%psb_z_base_vect_type%mlt(alpha,x,y,beta,info) call z%set_host() end select - end subroutine z_gpu_mlt_v_2 + end subroutine z_cuda_mlt_v_2 - subroutine z_gpu_scal(alpha, x) + subroutine z_cuda_scal(alpha, x) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x complex(psb_dpk_), intent (in) :: alpha integer(psb_ipk_) :: info if (x%is_host()) call x%sync() info = scalMultiVecDevice(alpha,x%deviceVect) call x%set_dev() - end subroutine z_gpu_scal + end subroutine z_cuda_scal - function z_gpu_nrm2(n,x) result(res) + function z_cuda_nrm2(n,x) result(res) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1073,11 +1073,11 @@ contains if (x%is_host()) call x%sync() info = nrm2MultiVecDeviceComplex(res,n,x%deviceVect) - end function z_gpu_nrm2 + end function z_cuda_nrm2 - function z_gpu_amax(n,x) result(res) + function z_cuda_amax(n,x) result(res) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1085,11 +1085,11 @@ contains if (x%is_host()) call x%sync() info = amaxMultiVecDeviceComplex(res,n,x%deviceVect) - end function z_gpu_amax + end function z_cuda_amax - function z_gpu_asum(n,x) result(res) + function z_cuda_asum(n,x) result(res) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1097,11 +1097,11 @@ contains if (x%is_host()) call x%sync() info = asumMultiVecDeviceComplex(res,n,x%deviceVect) - end function z_gpu_asum + end function z_cuda_asum - subroutine z_gpu_absval1(x) + subroutine z_cuda_absval1(x) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n integer(psb_ipk_) :: info @@ -1109,18 +1109,18 @@ contains n=x%get_nrows() info = absMultiVecDevice(n,zone,x%deviceVect) - end subroutine z_gpu_absval1 + end subroutine z_cuda_absval1 - subroutine z_gpu_absval2(x,y) + subroutine z_cuda_absval2(x,y) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_) :: n integer(psb_ipk_) :: info n=min(x%get_nrows(),y%get_nrows()) select type (yy=> y) - class is (psb_z_vect_gpu) + class is (psb_z_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = absMultiVecDevice(n,zone,x%deviceVect,yy%deviceVect) @@ -1129,67 +1129,67 @@ contains if (y%is_dev()) call y%sync() call x%psb_z_base_vect_type%absval(y) end select - end subroutine z_gpu_absval2 + end subroutine z_cuda_absval2 - subroutine z_gpu_vect_finalize(x) + subroutine z_cuda_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_z_vect_gpu), intent(inout) :: x + type(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 call x%free(info) - end subroutine z_gpu_vect_finalize + end subroutine z_cuda_vect_finalize - subroutine z_gpu_ins_v(n,irl,val,dupl,x,info) + subroutine z_cuda_ins_v(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl class(psb_i_base_vect_type), intent(inout) :: irl class(psb_z_base_vect_type), intent(inout) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, isz - logical :: done_gpu + logical :: done_cuda info = 0 if (psb_errstatus_fatal()) return - done_gpu = .false. + done_cuda = .false. select type(virl => irl) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type(vval => val) - class is (psb_z_vect_gpu) + class is (psb_z_vect_cuda) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() info = geinsMultiVecDeviceDoubleComplex(n,virl%deviceVect,& & vval%deviceVect,dupl,1,x%deviceVect) call x%set_dev() - done_gpu=.true. + done_cuda=.true. end select end select - if (.not.done_gpu) then + if (.not.done_cuda) then if (irl%is_dev()) call irl%sync() if (val%is_dev()) call val%sync() call x%ins(n,irl%v,val%v,dupl,info) end if if (info /= 0) then - call psb_errpush(info,'gpu_vect_ins') + call psb_errpush(info,'cuda_vect_ins') return end if - end subroutine z_gpu_ins_v + end subroutine z_cuda_ins_v - subroutine z_gpu_ins_a(n,irl,val,dupl,x,info) + subroutine z_cuda_ins_a(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) complex(psb_dpk_), intent(in) :: val(:) @@ -1202,11 +1202,11 @@ contains call x%psb_z_base_vect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine z_gpu_ins_a + end subroutine z_cuda_ins_a #endif -end module psb_z_gpu_vect_mod +end module psb_z_cuda_vect_mod ! @@ -1215,7 +1215,7 @@ end module psb_z_gpu_vect_mod -module psb_z_gpu_multivect_mod +module psb_z_cuda_multivect_mod use iso_c_binding use psb_const_mod use psb_error_mod @@ -1224,7 +1224,7 @@ module psb_z_gpu_multivect_mod use psb_i_multivect_mod #ifdef HAVE_SPGPU - use psb_i_gpu_multivect_mod + use psb_i_cuda_multivect_mod use psb_z_vectordev_mod #endif @@ -1232,7 +1232,7 @@ module psb_z_gpu_multivect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_z_base_multivect_type) :: psb_z_multivect_gpu + type, extends(psb_z_base_multivect_type) :: psb_z_multivect_cuda #ifdef HAVE_SPGPU integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 @@ -1240,48 +1240,48 @@ module psb_z_gpu_multivect_mod real(c_double), allocatable :: buffer(:,:) type(c_ptr) :: dt_buf = c_null_ptr contains - procedure, pass(x) :: get_nrows => z_gpu_multi_get_nrows - procedure, pass(x) :: get_ncols => z_gpu_multi_get_ncols - procedure, nopass :: get_fmt => z_gpu_multi_get_fmt -!!$ procedure, pass(x) :: dot_v => z_gpu_multi_dot_v -!!$ procedure, pass(x) :: dot_a => z_gpu_multi_dot_a -!!$ procedure, pass(y) :: axpby_v => z_gpu_multi_axpby_v -!!$ procedure, pass(y) :: axpby_a => z_gpu_multi_axpby_a -!!$ procedure, pass(y) :: mlt_v => z_gpu_multi_mlt_v -!!$ procedure, pass(y) :: mlt_a => z_gpu_multi_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => z_gpu_multi_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => z_gpu_multi_mlt_v_2 -!!$ procedure, pass(x) :: scal => z_gpu_multi_scal -!!$ procedure, pass(x) :: nrm2 => z_gpu_multi_nrm2 -!!$ procedure, pass(x) :: amax => z_gpu_multi_amax -!!$ procedure, pass(x) :: asum => z_gpu_multi_asum - procedure, pass(x) :: all => z_gpu_multi_all - procedure, pass(x) :: zero => z_gpu_multi_zero - procedure, pass(x) :: asb => z_gpu_multi_asb - procedure, pass(x) :: sync => z_gpu_multi_sync - procedure, pass(x) :: sync_space => z_gpu_multi_sync_space - procedure, pass(x) :: bld_x => z_gpu_multi_bld_x - procedure, pass(x) :: bld_n => z_gpu_multi_bld_n - procedure, pass(x) :: free => z_gpu_multi_free - procedure, pass(x) :: ins => z_gpu_multi_ins - procedure, pass(x) :: is_host => z_gpu_multi_is_host - procedure, pass(x) :: is_dev => z_gpu_multi_is_dev - procedure, pass(x) :: is_sync => z_gpu_multi_is_sync - procedure, pass(x) :: set_host => z_gpu_multi_set_host - procedure, pass(x) :: set_dev => z_gpu_multi_set_dev - procedure, pass(x) :: set_sync => z_gpu_multi_set_sync - procedure, pass(x) :: set_scal => z_gpu_multi_set_scal - procedure, pass(x) :: set_vect => z_gpu_multi_set_vect -!!$ procedure, pass(x) :: gthzv_x => z_gpu_multi_gthzv_x -!!$ procedure, pass(y) :: sctb => z_gpu_multi_sctb -!!$ procedure, pass(y) :: sctb_x => z_gpu_multi_sctb_x - final :: z_gpu_multi_vect_finalize + procedure, pass(x) :: get_nrows => z_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => z_cuda_multi_get_ncols + procedure, nopass :: get_fmt => z_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => z_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => z_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => z_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => z_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => z_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => z_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => z_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => z_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => z_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => z_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => z_cuda_multi_amax +!!$ procedure, pass(x) :: asum => z_cuda_multi_asum + procedure, pass(x) :: all => z_cuda_multi_all + procedure, pass(x) :: zero => z_cuda_multi_zero + procedure, pass(x) :: asb => z_cuda_multi_asb + procedure, pass(x) :: sync => z_cuda_multi_sync + procedure, pass(x) :: sync_space => z_cuda_multi_sync_space + procedure, pass(x) :: bld_x => z_cuda_multi_bld_x + procedure, pass(x) :: bld_n => z_cuda_multi_bld_n + procedure, pass(x) :: free => z_cuda_multi_free + procedure, pass(x) :: ins => z_cuda_multi_ins + procedure, pass(x) :: is_host => z_cuda_multi_is_host + procedure, pass(x) :: is_dev => z_cuda_multi_is_dev + procedure, pass(x) :: is_sync => z_cuda_multi_is_sync + procedure, pass(x) :: set_host => z_cuda_multi_set_host + procedure, pass(x) :: set_dev => z_cuda_multi_set_dev + procedure, pass(x) :: set_sync => z_cuda_multi_set_sync + procedure, pass(x) :: set_scal => z_cuda_multi_set_scal + procedure, pass(x) :: set_vect => z_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => z_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => z_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => z_cuda_multi_sctb_x + final :: z_cuda_multi_vect_finalize #endif - end type psb_z_multivect_gpu + end type psb_z_multivect_cuda - public :: psb_z_multivect_gpu + public :: psb_z_multivect_cuda private :: constructor - interface psb_z_multivect_gpu + interface psb_z_multivect_cuda module procedure constructor end interface @@ -1289,7 +1289,7 @@ contains function constructor(x) result(this) complex(psb_dpk_) :: x(:,:) - type(psb_z_multivect_gpu) :: this + type(psb_z_multivect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -1299,15 +1299,15 @@ contains #ifdef HAVE_SPGPU -!!$ subroutine z_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ subroutine z_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i,n !!$ class(psb_i_base_multivect_type) :: idx !!$ complex(psb_dpk_) :: y(:) -!!$ class(psb_z_multivect_gpu) :: x +!!$ class(psb_z_multivect_cuda) :: x !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (x%is_host()) call x%sync() !!$ @@ -1332,16 +1332,16 @@ contains !!$ end select !!$ !!$ -!!$ end subroutine z_gpu_multi_gthzv_x +!!$ end subroutine z_cuda_multi_gthzv_x !!$ !!$ !!$ -!!$ subroutine z_gpu_multi_sctb(n,idx,x,beta,y) +!!$ subroutine z_cuda_multi_sctb(n,idx,x,beta,y) !!$ implicit none !!$ !use psb_const_mod !!$ integer(psb_ipk_) :: n, idx(:) !!$ complex(psb_dpk_) :: beta, x(:) -!!$ class(psb_z_multivect_gpu) :: y +!!$ class(psb_z_multivect_cuda) :: y !!$ integer(psb_ipk_) :: info !!$ !!$ if (n == 0) return @@ -1351,17 +1351,17 @@ contains !!$ call y%psb_z_base_multivect_type%sctb(n,idx,x,beta) !!$ call y%set_host() !!$ -!!$ end subroutine z_gpu_multi_sctb +!!$ end subroutine z_cuda_multi_sctb !!$ -!!$ subroutine z_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ subroutine z_cuda_multi_sctb_x(i,n,idx,x,beta,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i, n !!$ class(psb_i_base_multivect_type) :: idx !!$ complex(psb_dpk_) :: beta, x(:) -!!$ class(psb_z_multivect_gpu) :: y +!!$ class(psb_z_multivect_cuda) :: y !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (y%is_host()) call y%sync() !!$ @@ -1387,13 +1387,13 @@ contains !!$ call y%sct(n,ii%v(i:),x,beta) !!$ end select !!$ -!!$ end subroutine z_gpu_multi_sctb_x +!!$ end subroutine z_cuda_multi_sctb_x - subroutine z_gpu_multi_bld_x(x,this) + subroutine z_cuda_multi_bld_x(x,this) use psb_base_mod complex(psb_dpk_), intent(in) :: this(:,:) - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info, m, n m=size(this,1) @@ -1403,101 +1403,101 @@ contains call psb_realloc(m,n,x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'z_gpu_multi_bld_x',& + call psb_errpush(info,'z_cuda_multi_bld_x',& & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) end if x%v(1:m,1:n) = this(1:m,1:n) call x%set_host() call x%sync() - end subroutine z_gpu_multi_bld_x + end subroutine z_cuda_multi_bld_x - subroutine z_gpu_multi_bld_n(x,m,n) + subroutine z_cuda_multi_bld_n(x,m,n) integer(psb_ipk_), intent(in) :: m,n - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(m,n,info) if (info /= 0) then - call psb_errpush(info,'z_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + call psb_errpush(info,'z_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) end if - end subroutine z_gpu_multi_bld_n + end subroutine z_cuda_multi_bld_n - subroutine z_gpu_multi_set_host(x) + subroutine z_cuda_multi_set_host(x) implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x x%state = is_host - end subroutine z_gpu_multi_set_host + end subroutine z_cuda_multi_set_host - subroutine z_gpu_multi_set_dev(x) + subroutine z_cuda_multi_set_dev(x) implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x x%state = is_dev - end subroutine z_gpu_multi_set_dev + end subroutine z_cuda_multi_set_dev - subroutine z_gpu_multi_set_sync(x) + subroutine z_cuda_multi_set_sync(x) implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x x%state = is_sync - end subroutine z_gpu_multi_set_sync + end subroutine z_cuda_multi_set_sync - function z_gpu_multi_is_dev(x) result(res) + function z_cuda_multi_is_dev(x) result(res) implicit none - class(psb_z_multivect_gpu), intent(in) :: x + class(psb_z_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function z_gpu_multi_is_dev + end function z_cuda_multi_is_dev - function z_gpu_multi_is_host(x) result(res) + function z_cuda_multi_is_host(x) result(res) implicit none - class(psb_z_multivect_gpu), intent(in) :: x + class(psb_z_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function z_gpu_multi_is_host + end function z_cuda_multi_is_host - function z_gpu_multi_is_sync(x) result(res) + function z_cuda_multi_is_sync(x) result(res) implicit none - class(psb_z_multivect_gpu), intent(in) :: x + class(psb_z_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function z_gpu_multi_is_sync + end function z_cuda_multi_is_sync - function z_gpu_multi_get_nrows(x) result(res) + function z_cuda_multi_get_nrows(x) result(res) implicit none - class(psb_z_multivect_gpu), intent(in) :: x + class(psb_z_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_nrows - end function z_gpu_multi_get_nrows + end function z_cuda_multi_get_nrows - function z_gpu_multi_get_ncols(x) result(res) + function z_cuda_multi_get_ncols(x) result(res) implicit none - class(psb_z_multivect_gpu), intent(in) :: x + class(psb_z_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_ncols - end function z_gpu_multi_get_ncols + end function z_cuda_multi_get_ncols - function z_gpu_multi_get_fmt() result(res) + function z_cuda_multi_get_fmt() result(res) implicit none character(len=5) :: res res = 'zGPU' - end function z_gpu_multi_get_fmt + end function z_cuda_multi_get_fmt -!!$ function z_gpu_multi_dot_v(n,x,y) result(res) +!!$ function z_cuda_multi_dot_v(n,x,y) result(res) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ class(psb_z_base_multivect_type), intent(inout) :: y !!$ integer(psb_ipk_), intent(in) :: n !!$ complex(psb_dpk_) :: res @@ -1514,13 +1514,13 @@ contains !!$ type is (psb_z_base_multivect_type) !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,x%v,1,yy%v,1) -!!$ type is (psb_z_multivect_gpu) +!!$ type is (psb_z_multivect_cuda) !!$ if (x%is_host()) call x%sync() !!$ if (yy%is_host()) call yy%sync() !!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) !!$ if (info /= 0) then !!$ info = psb_err_internal_error_ -!!$ call psb_errpush(info,'z_gpu_multi_dot_v') +!!$ call psb_errpush(info,'z_cuda_multi_dot_v') !!$ end if !!$ !!$ class default @@ -1529,11 +1529,11 @@ contains !!$ res = y%dot(n,x%v) !!$ end select !!$ -!!$ end function z_gpu_multi_dot_v +!!$ end function z_cuda_multi_dot_v !!$ -!!$ function z_gpu_multi_dot_a(n,x,y) result(res) +!!$ function z_cuda_multi_dot_a(n,x,y) result(res) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ complex(psb_dpk_), intent(in) :: y(:) !!$ integer(psb_ipk_), intent(in) :: n !!$ complex(psb_dpk_) :: res @@ -1542,14 +1542,14 @@ contains !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,y,1,x%v,1) !!$ -!!$ end function z_gpu_multi_dot_a +!!$ end function z_cuda_multi_dot_a !!$ -!!$ subroutine z_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine z_cuda_multi_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ class(psb_z_base_multivect_type), intent(inout) :: x -!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ class(psb_z_multivect_cuda), intent(inout) :: y !!$ complex(psb_dpk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: nx, ny @@ -1562,7 +1562,7 @@ contains !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() -!!$ type is (psb_z_multivect_gpu) +!!$ type is (psb_z_multivect_cuda) !!$ ! Do something different here !!$ if ((beta /= dzero).and.y%is_host())& !!$ & call y%sync() @@ -1581,27 +1581,27 @@ contains !!$ call y%axpby(m,alpha,x%v,beta,info) !!$ end select !!$ -!!$ end subroutine z_gpu_multi_axpby_v +!!$ end subroutine z_cuda_multi_axpby_v !!$ -!!$ subroutine z_gpu_multi_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine z_cuda_multi_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ complex(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ class(psb_z_multivect_cuda), intent(inout) :: y !!$ complex(psb_dpk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ if (y%is_dev()) call y%sync() !!$ call psb_geaxpby(m,alpha,x,beta,y%v,info) !!$ call y%set_host() -!!$ end subroutine z_gpu_multi_axpby_a +!!$ end subroutine z_cuda_multi_axpby_a !!$ -!!$ subroutine z_gpu_multi_mlt_v(x, y, info) +!!$ subroutine z_cuda_multi_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_z_base_multivect_type), intent(inout) :: x -!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ class(psb_z_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ integer(psb_ipk_) :: i, n @@ -1615,7 +1615,7 @@ contains !!$ y%v(i) = y%v(i) * xx%v(i) !!$ end do !!$ call y%set_host() -!!$ type is (psb_z_multivect_gpu) +!!$ type is (psb_z_multivect_cuda) !!$ ! Do something different here !!$ if (y%is_host()) call y%sync() !!$ if (xx%is_host()) call xx%sync() @@ -1627,13 +1627,13 @@ contains !!$ call y%set_host() !!$ end select !!$ -!!$ end subroutine z_gpu_multi_mlt_v +!!$ end subroutine z_cuda_multi_mlt_v !!$ -!!$ subroutine z_gpu_multi_mlt_a(x, y, info) +!!$ subroutine z_cuda_multi_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ class(psb_z_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1641,15 +1641,15 @@ contains !!$ call y%sync() !!$ call y%psb_z_base_multivect_type%mlt(x,info) !!$ call y%set_host() -!!$ end subroutine z_gpu_multi_mlt_a +!!$ end subroutine z_cuda_multi_mlt_a !!$ -!!$ subroutine z_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine z_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_dpk_), intent(in) :: alpha,beta !!$ complex(psb_dpk_), intent(in) :: x(:) !!$ complex(psb_dpk_), intent(in) :: y(:) -!!$ class(psb_z_multivect_gpu), intent(inout) :: z +!!$ class(psb_z_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1657,16 +1657,16 @@ contains !!$ if (z%is_dev()) call z%sync() !!$ call z%psb_z_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() -!!$ end subroutine z_gpu_multi_mlt_a_2 +!!$ end subroutine z_cuda_multi_mlt_a_2 !!$ -!!$ subroutine z_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ subroutine z_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod !!$ implicit none !!$ complex(psb_dpk_), intent(in) :: alpha,beta !!$ class(psb_z_base_multivect_type), intent(inout) :: x !!$ class(psb_z_base_multivect_type), intent(inout) :: y -!!$ class(psb_z_multivect_gpu), intent(inout) :: z +!!$ class(psb_z_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ character(len=1), intent(in), optional :: conjgx, conjgy !!$ integer(psb_ipk_) :: i, n @@ -1689,9 +1689,9 @@ contains !!$ ! !!$ info = 0 !!$ select type(xx => x) -!!$ type is (psb_z_multivect_gpu) +!!$ type is (psb_z_multivect_cuda) !!$ select type (yy => y) -!!$ type is (psb_z_multivect_gpu) +!!$ type is (psb_z_multivect_cuda) !!$ if (xx%is_host()) call xx%sync() !!$ if (yy%is_host()) call yy%sync() !!$ ! Z state is irrelevant: it will be done on the GPU. @@ -1711,11 +1711,11 @@ contains !!$ call z%psb_z_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() !!$ end select -!!$ end subroutine z_gpu_multi_mlt_v_2 +!!$ end subroutine z_cuda_multi_mlt_v_2 - subroutine z_gpu_multi_set_scal(x,val) - class(psb_z_multivect_gpu), intent(inout) :: x + subroutine z_cuda_multi_set_scal(x,val) + class(psb_z_multivect_cuda), intent(inout) :: x complex(psb_dpk_), intent(in) :: val integer(psb_ipk_) :: info @@ -1723,10 +1723,10 @@ contains if (x%is_dev()) call x%sync() call x%psb_z_base_multivect_type%set_scal(val) call x%set_host() - end subroutine z_gpu_multi_set_scal + end subroutine z_cuda_multi_set_scal - subroutine z_gpu_multi_set_vect(x,val) - class(psb_z_multivect_gpu), intent(inout) :: x + subroutine z_cuda_multi_set_vect(x,val) + class(psb_z_multivect_cuda), intent(inout) :: x complex(psb_dpk_), intent(in) :: val(:,:) integer(psb_ipk_) :: nr integer(psb_ipk_) :: info @@ -1735,24 +1735,24 @@ contains call x%psb_z_base_multivect_type%set_vect(val) call x%set_host() - end subroutine z_gpu_multi_set_vect + end subroutine z_cuda_multi_set_vect -!!$ subroutine z_gpu_multi_scal(alpha, x) +!!$ subroutine z_cuda_multi_scal(alpha, x) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ complex(psb_dpk_), intent (in) :: alpha !!$ !!$ if (x%is_dev()) call x%sync() !!$ call x%psb_z_base_multivect_type%scal(alpha) !!$ call x%set_host() -!!$ end subroutine z_gpu_multi_scal +!!$ end subroutine z_cuda_multi_scal !!$ !!$ -!!$ function z_gpu_multi_nrm2(n,x) result(res) +!!$ function z_cuda_multi_nrm2(n,x) result(res) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ integer(psb_ipk_) :: info @@ -1760,36 +1760,36 @@ contains !!$ if (x%is_host()) call x%sync() !!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) !!$ -!!$ end function z_gpu_multi_nrm2 +!!$ end function z_cuda_multi_nrm2 !!$ -!!$ function z_gpu_multi_amax(n,x) result(res) +!!$ function z_cuda_multi_amax(n,x) result(res) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = maxval(abs(x%v(1:n))) !!$ -!!$ end function z_gpu_multi_amax +!!$ end function z_cuda_multi_amax !!$ -!!$ function z_gpu_multi_asum(n,x) result(res) +!!$ function z_cuda_multi_asum(n,x) result(res) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = sum(abs(x%v(1:n))) !!$ -!!$ end function z_gpu_multi_asum +!!$ end function z_cuda_multi_asum - subroutine z_gpu_multi_all(m,n, x, info) + subroutine z_cuda_multi_all(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_multivect_gpu), intent(out) :: x + class(psb_z_multivect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(m,n,x%v,info,pad=zzero) @@ -1799,26 +1799,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'z_gpu_multi_all',& + call psb_errpush(info,'z_cuda_multi_all',& & i_err=(/m,n,n,n,n/)) end if - end subroutine z_gpu_multi_all + end subroutine z_cuda_multi_all - subroutine z_gpu_multi_zero(x) + subroutine z_cuda_multi_zero(x) use psi_serial_mod implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine z_gpu_multi_zero + end subroutine z_cuda_multi_zero - subroutine z_gpu_multi_asb(m,n, x, info) + subroutine z_cuda_multi_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nd, nc @@ -1838,12 +1838,12 @@ contains call x%set_host() end if end if - end subroutine z_gpu_multi_asb + end subroutine z_cuda_multi_asb - subroutine z_gpu_multi_sync_space(x,info) + subroutine z_cuda_multi_sync_space(x,info) use psb_realloc_mod implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: mh,nh,md,nd @@ -1896,11 +1896,11 @@ contains end if - end subroutine z_gpu_multi_sync_space + end subroutine z_cuda_multi_sync_space - subroutine z_gpu_multi_sync(x) + subroutine z_cuda_multi_sync(x) implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -1916,16 +1916,16 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'z_gpu_multi_sync') + call psb_errpush(info,'z_cuda_multi_sync') end if - end subroutine z_gpu_multi_sync + end subroutine z_cuda_multi_sync - subroutine z_gpu_multi_free(x, info) + subroutine z_cuda_multi_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 @@ -1940,13 +1940,13 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine z_gpu_multi_free + end subroutine z_cuda_multi_free - subroutine z_gpu_multi_vect_finalize(x) + subroutine z_cuda_multi_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_z_multivect_gpu), intent(inout) :: x + type(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 @@ -1961,12 +1961,12 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine z_gpu_multi_vect_finalize + end subroutine z_cuda_multi_vect_finalize - subroutine z_gpu_multi_ins(n,irl,val,dupl,x,info) + subroutine z_cuda_multi_ins(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) complex(psb_dpk_), intent(in) :: val(:,:) @@ -1979,11 +1979,11 @@ contains call x%psb_z_base_multivect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine z_gpu_multi_ins + end subroutine z_cuda_multi_ins #endif -end module psb_z_gpu_multivect_mod +end module psb_z_cuda_multivect_mod diff --git a/test/gpukern/Makefile b/test/cudakern/Makefile similarity index 100% rename from test/gpukern/Makefile rename to test/cudakern/Makefile diff --git a/test/gpukern/c_file_spmv.F90 b/test/cudakern/c_file_spmv.F90 similarity index 100% rename from test/gpukern/c_file_spmv.F90 rename to test/cudakern/c_file_spmv.F90 diff --git a/test/gpukern/d_file_spmv.F90 b/test/cudakern/d_file_spmv.F90 similarity index 100% rename from test/gpukern/d_file_spmv.F90 rename to test/cudakern/d_file_spmv.F90 diff --git a/test/gpukern/data_input.f90 b/test/cudakern/data_input.f90 similarity index 100% rename from test/gpukern/data_input.f90 rename to test/cudakern/data_input.f90 diff --git a/test/gpukern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 similarity index 99% rename from test/gpukern/dpdegenmv.F90 rename to test/cudakern/dpdegenmv.F90 index ab616471..b8a2ba2c 100644 --- a/test/gpukern/dpdegenmv.F90 +++ b/test/cudakern/dpdegenmv.F90 @@ -548,7 +548,7 @@ program pdgenmv use psb_util_mod use psb_ext_mod #ifdef HAVE_GPU - use psb_gpu_mod + use psb_cuda_mod #endif #ifdef HAVE_RSB use psb_rsb_mod @@ -619,7 +619,7 @@ program pdgenmv call psb_info(ctxt,iam,np) #ifdef HAVE_GPU - call psb_gpu_init(ctxt) + call psb_cuda_init(ctxt) #endif #ifdef HAVE_RSB call psb_rsb_init() @@ -641,7 +641,7 @@ program pdgenmv end if #ifdef HAVE_GPU write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) #endif ! ! get parameters @@ -761,7 +761,7 @@ program pdgenmv call psb_barrier(ctxt) t1 = psb_wtime() call agpu%cscnv(info,mold=agmold) - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() t2 = psb_Wtime() -t1 call psb_amx(ctxt,t2) if (j==1) tcnvg1 = t2 @@ -798,7 +798,7 @@ program pdgenmv end if end do - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() call psb_barrier(ctxt) tt2 = psb_wtime() - tt1 call psb_amx(ctxt,tt2) @@ -825,7 +825,7 @@ program pdgenmv end if end do - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() call psb_barrier(ctxt) gt2 = psb_wtime() - gt1 call psb_amx(ctxt,gt2) @@ -928,7 +928,7 @@ program pdgenmv #ifdef HAVE_GPU bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth - bdwdth = psb_gpu_MemoryPeakBandwidth() + bdwdth = psb_cuda_MemoryPeakBandwidth() write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth #endif write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() @@ -950,7 +950,7 @@ program pdgenmv goto 9999 end if #ifdef HAVE_GPU - call psb_gpu_exit() + call psb_cuda_exit() #endif call psb_exit(ctxt) stop diff --git a/test/gpukern/s_file_spmv.F90 b/test/cudakern/s_file_spmv.F90 similarity index 100% rename from test/gpukern/s_file_spmv.F90 rename to test/cudakern/s_file_spmv.F90 diff --git a/test/gpukern/spdegenmv.F90 b/test/cudakern/spdegenmv.F90 similarity index 100% rename from test/gpukern/spdegenmv.F90 rename to test/cudakern/spdegenmv.F90 diff --git a/test/gpukern/z_file_spmv.F90 b/test/cudakern/z_file_spmv.F90 similarity index 100% rename from test/gpukern/z_file_spmv.F90 rename to test/cudakern/z_file_spmv.F90 From 9b713c177bf90424c634c7c29c5a773835629459 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 28 Nov 2023 19:46:20 +0100 Subject: [PATCH 013/110] Fix cuda interfaces for renaming --- cuda/cuda_util.c | 46 ++++++++++++++++++------------------- cuda/cuda_util.h | 18 +++++++-------- cuda/cvectordev.c | 28 +++++++++++----------- cuda/diagdev.c | 4 ++-- cuda/dnsdev.c | 8 +++---- cuda/dvectordev.c | 28 +++++++++++----------- cuda/elldev.c | 24 +++++++++---------- cuda/hdiagdev.c | 4 ++-- cuda/hlldev.c | 16 ++++++------- cuda/ivectordev.c | 8 +++---- cuda/svectordev.c | 28 +++++++++++----------- cuda/zvectordev.c | 28 +++++++++++----------- test/cudakern/dpdegenmv.F90 | 16 ++++++------- 13 files changed, 128 insertions(+), 128 deletions(-) diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c index 63c38b53..0fe4a8b7 100644 --- a/cuda/cuda_util.c +++ b/cuda/cuda_util.c @@ -37,7 +37,7 @@ static int hasUVA=-1; static struct cudaDeviceProp *prop=NULL; -static spgpuHandle_t psb_gpu_handle = NULL; +static spgpuHandle_t psb_cuda_handle = NULL; static cublasHandle_t psb_cublas_handle = NULL; @@ -228,7 +228,7 @@ int gpuInit(int dev) return SPGPU_UNSPECIFIED; } if (!psb_cublas_handle) - psb_gpuCreateCublasHandle(); + psb_cudaCreateCublasHandle(); hasUVA=getDeviceHasUVA(); return err; @@ -238,14 +238,14 @@ int gpuInit(int dev) void gpuClose() { cudaStream_t st1, st2; - if (! psb_gpu_handle) - st1=spgpuGetStream(psb_gpu_handle); + if (! psb_cuda_handle) + st1=spgpuGetStream(psb_cuda_handle); if (! psb_cublas_handle) cublasGetStream(psb_cublas_handle,&st2); - psb_gpuDestroyHandle(); + psb_cudaDestroyHandle(); if (st1 != st2) - psb_gpuDestroyCublasHandle(); + psb_cudaDestroyCublasHandle(); free(prop); prop=NULL; hasUVA=-1; @@ -391,49 +391,49 @@ void cudaReset() } -spgpuHandle_t psb_gpuGetHandle() +spgpuHandle_t psb_cudaGetHandle() { - return psb_gpu_handle; + return psb_cuda_handle; } -void psb_gpuCreateHandle() +void psb_cudaCreateHandle() { - if (!psb_gpu_handle) - spgpuCreate(&psb_gpu_handle, getDevice()); + if (!psb_cuda_handle) + spgpuCreate(&psb_cuda_handle, getDevice()); } -void psb_gpuDestroyHandle() +void psb_cudaDestroyHandle() { - if (!psb_gpu_handle) - spgpuDestroy(psb_gpu_handle); - psb_gpu_handle = NULL; + if (!psb_cuda_handle) + spgpuDestroy(psb_cuda_handle); + psb_cuda_handle = NULL; } -cudaStream_t psb_gpuGetStream() +cudaStream_t psb_cudaGetStream() { - return spgpuGetStream(psb_gpu_handle); + return spgpuGetStream(psb_cuda_handle); } -void psb_gpuSetStream(cudaStream_t stream) +void psb_cudaSetStream(cudaStream_t stream) { - spgpuSetStream(psb_gpu_handle, stream); + spgpuSetStream(psb_cuda_handle, stream); return ; } -cublasHandle_t psb_gpuGetCublasHandle() +cublasHandle_t psb_cudaGetCublasHandle() { if (!psb_cublas_handle) - psb_gpuCreateCublasHandle(); + psb_cudaCreateCublasHandle(); return psb_cublas_handle; } -void psb_gpuCreateCublasHandle() +void psb_cudaCreateCublasHandle() { if (!psb_cublas_handle) cublasCreate(&psb_cublas_handle); } -void psb_gpuDestroyCublasHandle() +void psb_cudaDestroyCublasHandle() { if (!psb_cublas_handle) cublasDestroy(psb_cublas_handle); diff --git a/cuda/cuda_util.h b/cuda/cuda_util.h index 03c7b488..789c08f4 100644 --- a/cuda/cuda_util.h +++ b/cuda/cuda_util.h @@ -71,15 +71,15 @@ void cudaReset(); void gpuClose(); -spgpuHandle_t psb_gpuGetHandle(); -void psb_gpuCreateHandle(); -void psb_gpuDestroyHandle(); -cudaStream_t psb_gpuGetStream(); -void psb_gpuSetStream(cudaStream_t stream); - -cublasHandle_t psb_gpuGetCublasHandle(); -void psb_gpuCreateCublasHandle(); -void psb_gpuDestroyCublasHandle(); +spgpuHandle_t psb_cudaGetHandle(); +void psb_cudaCreateHandle(); +void psb_cudaDestroyHandle(); +cudaStream_t psb_cudaGetStream(); +void psb_cudaSetStream(cudaStream_t stream); + +cublasHandle_t psb_cudaGetCublasHandle(); +void psb_cudaCreateCublasHandle(); +void psb_cudaDestroyCublasHandle(); int allocateInt(void **, int); diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c index db55caef..1dc16667 100644 --- a/cuda/cvectordev.c +++ b/cuda/cvectordev.c @@ -89,7 +89,7 @@ int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last, { int i=0; int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuCsetscal(handle, first, last, indexBase, val, (cuFloatComplex *) devVecX->v_); @@ -104,7 +104,7 @@ int geinsMultiVecDeviceFloatComplex(int n, void* devMultiVecIrl, void* devMultiV struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecIrl->pitch_; if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) return SPGPU_UNSUPPORTED; @@ -144,7 +144,7 @@ int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int i, *idx =(int *) indexes;; cuFloatComplex *hv = (cuFloatComplex *) host_values;; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); i=0; hv = &(hv[hfirst-indexBase]); @@ -175,7 +175,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, cuFloatComplex *hv = (cuFloatComplex *) host_values; int *idx=(int *) indexes; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); idx = &(idx[first-indexBase]); hv = &(hv[hfirst-indexBase]); @@ -187,7 +187,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuCmnrm2(handle, y_res, n,(cuFloatComplex *)devVecA->v_, @@ -197,7 +197,7 @@ int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuCmamax(handle, y_res, n,(cuFloatComplex *)devVecA->v_, @@ -207,7 +207,7 @@ int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuCmasum(handle, y_res, n,(cuFloatComplex *)devVecA->v_, @@ -218,7 +218,7 @@ int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; // Note: inner kernel can handle aliased input/output spgpuCscal(handle, (cuFloatComplex *)devVecA->v_, devVecA->pitch_, @@ -231,7 +231,7 @@ int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, {int i=0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuCmdot(handle, y_res, n, (cuFloatComplex*)devVecA->v_, (cuFloatComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); @@ -244,7 +244,7 @@ int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVe int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecY->pitch_; if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; @@ -261,7 +261,7 @@ int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -279,7 +279,7 @@ int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVec struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) return SPGPU_UNSUPPORTED; @@ -297,7 +297,7 @@ int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -311,7 +311,7 @@ int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if (n > devVecA->size_) return SPGPU_UNSUPPORTED; diff --git a/cuda/diagdev.c b/cuda/diagdev.c index 64879455..a2acf1f4 100644 --- a/cuda/diagdev.c +++ b/cuda/diagdev.c @@ -186,7 +186,7 @@ int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); #ifdef HAVE_SPGPU #ifdef VERBOSE @@ -268,7 +268,7 @@ int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); #ifdef HAVE_SPGPU #ifdef VERBOSE diff --git a/cuda/dnsdev.c b/cuda/dnsdev.c index fb4d339c..25cddc87 100644 --- a/cuda/dnsdev.c +++ b/cuda/dnsdev.c @@ -126,7 +126,7 @@ int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha, int status; #ifdef HAVE_SPGPU - cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ if (n == 1) { @@ -157,7 +157,7 @@ int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha, int status; #ifdef HAVE_SPGPU - cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ if (n == 1) { @@ -188,7 +188,7 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a int status; #ifdef HAVE_SPGPU - cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ if (n == 1) { @@ -219,7 +219,7 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex int status; #ifdef HAVE_SPGPU - cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ if (n == 1) { diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c index 8b020c16..eae82c1e 100644 --- a/cuda/dvectordev.c +++ b/cuda/dvectordev.c @@ -88,7 +88,7 @@ int setscalMultiVecDeviceDouble(double val, int first, int last, { int i=0; int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuDsetscal(handle, first, last, indexBase, val, (double *) devVecX->v_); @@ -104,7 +104,7 @@ int geinsMultiVecDeviceDouble(int n, void* devMultiVecIrl, void* devMultiVecVal, struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecIrl->pitch_; if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) return SPGPU_UNSUPPORTED; @@ -143,7 +143,7 @@ int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int i, *idx =(int *) indexes;; double *hv = (double *) host_values;; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); i=0; hv = &(hv[hfirst-indexBase]); @@ -168,7 +168,7 @@ int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, v double *hv = (double *) host_values; int *idx=(int *) indexes; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); idx = &(idx[first-indexBase]); hv = &(hv[hfirst-indexBase]); @@ -179,7 +179,7 @@ int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, v int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; // Note: inner kernel can handle aliased input/output spgpuDscal(handle, (double *)devVecA->v_, devVecA->pitch_, @@ -189,7 +189,7 @@ int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA) int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuDmnrm2(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -198,7 +198,7 @@ int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuDmamax(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -207,7 +207,7 @@ int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) int asumMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuDmasum(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -219,7 +219,7 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devM {int i=0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_); return(i); @@ -231,7 +231,7 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX, int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecY->pitch_; if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; @@ -246,7 +246,7 @@ int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceV { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -262,7 +262,7 @@ int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) return SPGPU_UNSUPPORTED; @@ -278,7 +278,7 @@ int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -291,7 +291,7 @@ int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if (n > devVecA->size_) return SPGPU_UNSUPPORTED; diff --git a/cuda/elldev.c b/cuda/elldev.c index 8fd7aeb5..eff89efa 100644 --- a/cuda/elldev.c +++ b/cuda/elldev.c @@ -158,7 +158,7 @@ void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, i int avgRowSize, int maxRowSize, int rows, int pitch, float *x, float beta, int firstIndex) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); for (i=0; irows+1)*sizeof(int)); //cudaSync(); - handle = psb_gpuGetHandle(); + handle = psb_cudaGetHandle(); psi_cuda_s_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, (int *) devMat->rS, (int *) devMat->hackOffs, devIdisp,devJa,devVal, @@ -502,7 +502,7 @@ int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int is //fprintf(stderr,"WriteRemoteBuffer idisp %d\n",i); //cudaSync(); //fprintf(stderr," hacksz: %d \n",hacksz); - handle = psb_gpuGetHandle(); + handle = psb_cudaGetHandle(); psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, (int *) devMat->rS, (int *) devMat->hackOffs, devIdisp,devJa,devVal, @@ -545,7 +545,7 @@ int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); //cudaSync(); - handle = psb_gpuGetHandle(); + handle = psb_cudaGetHandle(); psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, (int *) devMat->rS, (int *) devMat->hackOffs, devIdisp,devJa,devVal, @@ -588,7 +588,7 @@ int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); //cudaSync(); - handle = psb_gpuGetHandle(); + handle = psb_cudaGetHandle(); psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, (int *) devMat->rS, (int *) devMat->hackOffs, devIdisp,devJa,devVal, diff --git a/cuda/ivectordev.c b/cuda/ivectordev.c index 93636465..71d5c472 100644 --- a/cuda/ivectordev.c +++ b/cuda/ivectordev.c @@ -90,7 +90,7 @@ int setscalMultiVecDeviceInt(int val, int first, int last, { int i=0; int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuIsetscal(handle, first, last, indexBase, val, (int *) devVecX->v_); @@ -105,7 +105,7 @@ int geinsMultiVecDeviceInt(int n, void* devMultiVecIrl, void* devMultiVecVal, struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecIrl->pitch_; if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) return SPGPU_UNSUPPORTED; @@ -144,7 +144,7 @@ int igathMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int i, *idx =(int *) indexes;; int *hv = (int *) host_values;; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); i=0; hv = &(hv[hfirst-indexBase]); @@ -169,7 +169,7 @@ int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void int *hv = (int *) host_values; int *idx=(int *) indexes; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); idx = &(idx[first-indexBase]); hv = &(hv[hfirst-indexBase]); diff --git a/cuda/svectordev.c b/cuda/svectordev.c index d193a4d8..9a41ae1a 100644 --- a/cuda/svectordev.c +++ b/cuda/svectordev.c @@ -88,7 +88,7 @@ int setscalMultiVecDeviceFloat(float val, int first, int last, { int i=0; int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuSsetscal(handle, first, last, indexBase, val, (float *) devVecX->v_); @@ -103,7 +103,7 @@ int geinsMultiVecDeviceFloat(int n, void* devMultiVecIrl, void* devMultiVecVal, struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecIrl->pitch_; if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) return SPGPU_UNSUPPORTED; @@ -142,7 +142,7 @@ int igathMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int i, *idx =(int *) indexes;; float *hv = (float *) host_values;; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); i=0; hv = &(hv[hfirst-indexBase]); @@ -167,7 +167,7 @@ int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, vo float *hv = (float *) host_values; int *idx=(int *) indexes; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); idx = &(idx[first-indexBase]); hv = &(hv[hfirst-indexBase]); @@ -179,7 +179,7 @@ int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, vo int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuSmnrm2(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -188,7 +188,7 @@ int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuSmamax(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -197,7 +197,7 @@ int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuSmasum(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -207,7 +207,7 @@ int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) int scalMultiVecDeviceFloat(float alpha, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; // Note: inner kernel can handle aliased input/output spgpuSscal(handle, (float *)devVecA->v_, devVecA->pitch_, @@ -219,7 +219,7 @@ int dotMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA, void* devMul {int i=0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,devVecA->count_,devVecB->pitch_); return(i); @@ -231,7 +231,7 @@ int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX, int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecY->pitch_; if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; @@ -246,7 +246,7 @@ int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVec { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -262,7 +262,7 @@ int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) return SPGPU_UNSUPPORTED; @@ -278,7 +278,7 @@ int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -291,7 +291,7 @@ int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if (n > devVecA->size_) return SPGPU_UNSUPPORTED; diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c index c245719f..c3671a86 100644 --- a/cuda/zvectordev.c +++ b/cuda/zvectordev.c @@ -89,7 +89,7 @@ int setscalMultiVecDeviceDoubleComplex(cuDoubleComplex val, int first, int last, { int i=0; int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuZsetscal(handle, first, last, indexBase, val, (cuDoubleComplex *) devVecX->v_); @@ -104,7 +104,7 @@ int geinsMultiVecDeviceDoubleComplex(int n, void* devMultiVecIrl, void* devMulti struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecIrl->pitch_; if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) return SPGPU_UNSUPPORTED; @@ -144,7 +144,7 @@ int igathMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, int i, *idx =(int *) indexes;; cuDoubleComplex *hv = (cuDoubleComplex *) host_values;; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); i=0; hv = &(hv[hfirst-indexBase]); @@ -174,7 +174,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, cuDoubleComplex *hv = (cuDoubleComplex *) host_values; int *idx=(int *) indexes; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); idx = &(idx[first-indexBase]); hv = &(hv[hfirst-indexBase]); @@ -186,7 +186,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuZmnrm2(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -195,7 +195,7 @@ int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuZmamax(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, @@ -205,7 +205,7 @@ int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuZmasum(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, @@ -216,7 +216,7 @@ int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; // Note: inner kernel can handle aliased input/output spgpuZscal(handle, (cuDoubleComplex *)devVecA->v_, devVecA->pitch_, @@ -228,7 +228,7 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMulti {int i=0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuZmdot(handle, y_res, n, (cuDoubleComplex*)devVecA->v_, (cuDoubleComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); @@ -241,7 +241,7 @@ int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMulti int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecY->pitch_; if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; @@ -258,7 +258,7 @@ int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -275,7 +275,7 @@ int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceV struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) return SPGPU_UNSUPPORTED; @@ -293,7 +293,7 @@ int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVe struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -307,7 +307,7 @@ int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVe int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if (n > devVecA->size_) return SPGPU_UNSUPPORTED; diff --git a/test/cudakern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 index b8a2ba2c..d2cc2172 100644 --- a/test/cudakern/dpdegenmv.F90 +++ b/test/cudakern/dpdegenmv.F90 @@ -573,8 +573,8 @@ program pdgenmv ! dense matrices type(psb_d_vect_type), target :: xv, bv, xg, bg #ifdef HAVE_GPU - type(psb_d_vect_gpu) :: vmold - type(psb_i_vect_gpu) :: imold + type(psb_d_vect_cuda) :: vmold + type(psb_i_vect_cuda) :: imold #endif real(psb_dpk_), allocatable :: x1(:), x2(:), x0(:) ! blacs parameters @@ -595,14 +595,14 @@ program pdgenmv type(psb_d_rsb_sparse_mat), target :: arsb #endif #ifdef HAVE_GPU - type(psb_d_elg_sparse_mat), target :: aelg - type(psb_d_csrg_sparse_mat), target :: acsrg + type(psb_d_cuda_elg_sparse_mat), target :: aelg + type(psb_d_cuda_csrg_sparse_mat), target :: acsrg #if CUDA_SHORT_VERSION <= 10 - type(psb_d_hybg_sparse_mat), target :: ahybg + type(psb_d_cuda_hybg_sparse_mat), target :: ahybg #endif - type(psb_d_hlg_sparse_mat), target :: ahlg - type(psb_d_hdiag_sparse_mat), target :: ahdiag - type(psb_d_dnsg_sparse_mat), target :: adnsg + type(psb_d_cuda_hlg_sparse_mat), target :: ahlg + type(psb_d_cuda_hdiag_sparse_mat), target :: ahdiag + type(psb_d_cuda_dnsg_sparse_mat), target :: adnsg #endif class(psb_d_base_sparse_mat), pointer :: agmold, acmold ! other variables From 655c86caeda10756c110adf8863280bfab85da63 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 Nov 2023 10:20:38 +0100 Subject: [PATCH 014/110] Updated docs. --- README.md | 16 ++++++++++++++- cuda/License-spgpu.md | 21 ++++++++++++++++++++ test/cudakern/dpdegenmv.F90 | 4 ++-- test/cudakern/spdegenmv.F90 | 39 ++++++++++++++++++++----------------- 4 files changed, 59 insertions(+), 21 deletions(-) create mode 100644 cuda/License-spgpu.md diff --git a/README.md b/README.md index a9813f5e..afab1646 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -PSBLAS library, version 3.8 +PSBLAS library, version 3.9 =========================== The architecture of the Fortran 2003 sparse BLAS is described in: @@ -40,6 +40,15 @@ The main reference for the serial sparse BLAS is: >linear algebra subprograms for sparse matrices: a user level interface, >ACM Trans. Math. Softw., 23(3), 379-401, 1997. +CUDA and GPU support +-------------------- +This version of PSBLAS incorporates into a single package three +entities that were previouslty separated: +1. PSBLAS -- the base library +2. PSBLAS-EXT -- a library providing additional storage formats +3. SPGPU -- a package of kernels for NVIDIA GPUs originally + written by Davide Barbieri and Salvatore Filippone; + see the license file cuda/License-spgpu.md INSTALLING ---------- @@ -61,6 +70,11 @@ prerequisites (see also SERIAL below): specify `--with-amd` (see `./configure --help` for more details). We use the C interface to AMD. +5. If you have CUDA available, use + --with-cuda= to specify the CUDA toolkit location + --with-cudacc=XX,YY,ZZ to specify a list of target CCs (compute + capabilities) to compile the CUDA code for. + The configure script will generate a Make.inc file suitable for building the library. The script is capable of recognizing the needed libraries with their default names; if they are in unusual places consider adding diff --git a/cuda/License-spgpu.md b/cuda/License-spgpu.md new file mode 100644 index 00000000..7f4b8ff4 --- /dev/null +++ b/cuda/License-spgpu.md @@ -0,0 +1,21 @@ +(c) Copyright 2011-2021 Davide Barbieri, Salvatore Filippone + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/test/cudakern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 index d2cc2172..bde57f5f 100644 --- a/test/cudakern/dpdegenmv.F90 +++ b/test/cudakern/dpdegenmv.F90 @@ -631,7 +631,7 @@ program pdgenmv stop endif if(psb_get_errstatus() /= 0) goto 9999 - name='pdegenmv-gpu' + name='pdegenmv-cuda' ! ! Hello world ! @@ -974,7 +974,7 @@ contains if (iam == 0) then write(*,*) 'CPU side format?' read(psb_inp_unit,*) acfmt - write(*,*) 'GPU side format?' + write(*,*) 'CUDA side format?' read(psb_inp_unit,*) agfmt write(*,*) 'Size of discretization cube?' read(psb_inp_unit,*) idim diff --git a/test/cudakern/spdegenmv.F90 b/test/cudakern/spdegenmv.F90 index 1c7d646f..9644d8c7 100644 --- a/test/cudakern/spdegenmv.F90 +++ b/test/cudakern/spdegenmv.F90 @@ -548,7 +548,7 @@ program pdgenmv use psb_util_mod use psb_ext_mod #ifdef HAVE_GPU - use psb_gpu_mod + use psb_cuda_mod #endif use psb_s_pde3d_mod implicit none @@ -570,8 +570,8 @@ program pdgenmv ! dense matrices type(psb_s_vect_type), target :: xv,bv, xg, bg #ifdef HAVE_GPU - type(psb_s_vect_gpu) :: vmold - type(psb_i_vect_gpu) :: imold + type(psb_s_vect_cuda) :: vmold + type(psb_i_vect_cuda) :: imold #endif real(psb_spk_), allocatable :: x1(:), x2(:), x0(:) ! blacs parameters @@ -589,14 +589,14 @@ program pdgenmv type(psb_s_dia_sparse_mat), target :: adia type(psb_s_hdia_sparse_mat), target :: ahdia #ifdef HAVE_GPU - type(psb_s_elg_sparse_mat), target :: aelg - type(psb_s_csrg_sparse_mat), target :: acsrg + type(psb_s_cuda_elg_sparse_mat), target :: aelg + type(psb_s_cuda_csrg_sparse_mat), target :: acsrg #if CUDA_SHORT_VERSION <= 10 - type(psb_s_hybg_sparse_mat), target :: ahybg + type(psb_s_cuda_hybg_sparse_mat), target :: ahybg #endif - type(psb_s_hlg_sparse_mat), target :: ahlg - type(psb_s_dnsg_sparse_mat), target :: adnsg - type(psb_s_hdiag_sparse_mat), target :: ahdiag + type(psb_s_cuda_hlg_sparse_mat), target :: ahlg + type(psb_s_cuda_hdiag_sparse_mat), target :: ahdiag + type(psb_s_cuda_dnsg_sparse_mat), target :: adnsg #endif class(psb_s_base_sparse_mat), pointer :: agmold, acmold ! other variables @@ -613,7 +613,10 @@ program pdgenmv call psb_info(ctxt,iam,np) #ifdef HAVE_GPU - call psb_gpu_init(ctxt) + call psb_cuda_init(ctxt) +#endif +#ifdef HAVE_RSB + call psb_rsb_init() #endif if (iam < 0) then @@ -622,7 +625,7 @@ program pdgenmv stop endif if(psb_get_errstatus() /= 0) goto 9999 - name='pdegenmv-gpu' + name='pdegenmv-cuda' ! ! Hello world ! @@ -632,7 +635,7 @@ program pdgenmv end if #ifdef HAVE_GPU write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) #endif ! ! get parameters @@ -752,7 +755,7 @@ program pdgenmv call psb_barrier(ctxt) t1 = psb_wtime() call agpu%cscnv(info,mold=agmold) - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() t2 = psb_Wtime() -t1 call psb_amx(ctxt,t2) if (j==1) tcnvg1 = t2 @@ -789,7 +792,7 @@ program pdgenmv end if end do - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() call psb_barrier(ctxt) tt2 = psb_wtime() - tt1 call psb_amx(ctxt,tt2) @@ -817,7 +820,7 @@ program pdgenmv end if end do - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() call psb_barrier(ctxt) gt2 = psb_wtime() - gt1 call psb_amx(ctxt,gt2) @@ -919,7 +922,7 @@ program pdgenmv #ifdef HAVE_GPU bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth - bdwdth = psb_gpu_MemoryPeakBandwidth() + bdwdth = psb_cuda_MemoryPeakBandwidth() write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth #endif write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() @@ -941,7 +944,7 @@ program pdgenmv goto 9999 end if #ifdef HAVE_GPU - call psb_gpu_exit() + call psb_cuda_exit() #endif call psb_exit(ctxt) stop @@ -965,7 +968,7 @@ contains if (iam == 0) then write(*,*) 'CPU side format?' read(psb_inp_unit,*) acfmt - write(*,*) 'GPU side format?' + write(*,*) 'CUDA side format?' read(psb_inp_unit,*) agfmt write(*,*) 'Size of discretization cube?' read(psb_inp_unit,*) idim From d3b2b7816da16a01f97c01f7d2bd1b4b07ab32a9 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 Nov 2023 15:35:55 +0100 Subject: [PATCH 015/110] Fix coo insert OpenMP. Fix Make.inc.in --- Make.inc.in | 3 --- base/serial/impl/psb_c_coo_impl.F90 | 6 ++++++ base/serial/impl/psb_d_coo_impl.F90 | 6 ++++++ base/serial/impl/psb_s_coo_impl.F90 | 6 ++++++ base/serial/impl/psb_z_coo_impl.F90 | 6 ++++++ 5 files changed, 24 insertions(+), 3 deletions(-) diff --git a/Make.inc.in b/Make.inc.in index ca0fa7f7..6a4b378d 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -71,11 +71,8 @@ CUDAD=@CUDAD@ CUDALD=@CUDALD@ LCUDA=@LCUDA@ -#SPGPUDIR=@SPGPU_DIR@ -#SPGPU_INCDIR=@SPGPU_INCDIR@ SPGPU_LIBS=@SPGPU_LIBS@ SPGPU_DEFINES=@SPGPU_DEFINES@ -#SPGPU_INCLUDES=@SPGPU_INCLUDES@ CUDA_DIR=@CUDA_DIR@ CUDA_DEFINES=@CUDA_DEFINES@ diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 46391dee..b1d71321 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -2878,14 +2878,20 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (isza < (nza+nz)) then info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else +#if defined(OPENMP) nzaold = nza nza = nza + nz +#endif call a%set_nzeros(nza) end if !$omp end critical if (info /= 0) goto 9999 call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) +#if !defined(OPENMP) + nza = nzaold + call a%set_nzeros(nza) +#endif call a%set_sorted(.false.) else if (a%is_upd()) then diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index c2babf8e..350085bb 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -2878,14 +2878,20 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (isza < (nza+nz)) then info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else +#if defined(OPENMP) nzaold = nza nza = nza + nz +#endif call a%set_nzeros(nza) end if !$omp end critical if (info /= 0) goto 9999 call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) +#if !defined(OPENMP) + nza = nzaold + call a%set_nzeros(nza) +#endif call a%set_sorted(.false.) else if (a%is_upd()) then diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 402c608a..51858efd 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -2878,14 +2878,20 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (isza < (nza+nz)) then info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else +#if defined(OPENMP) nzaold = nza nza = nza + nz +#endif call a%set_nzeros(nza) end if !$omp end critical if (info /= 0) goto 9999 call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) +#if !defined(OPENMP) + nza = nzaold + call a%set_nzeros(nza) +#endif call a%set_sorted(.false.) else if (a%is_upd()) then diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 542f842e..0624dd21 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -2878,14 +2878,20 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (isza < (nza+nz)) then info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else +#if defined(OPENMP) nzaold = nza nza = nza + nz +#endif call a%set_nzeros(nza) end if !$omp end critical if (info /= 0) goto 9999 call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) +#if !defined(OPENMP) + nza = nzaold + call a%set_nzeros(nza) +#endif call a%set_sorted(.false.) else if (a%is_upd()) then From 6c9ca58282ce019a0d0eb29d02dfcb98c1cd0ce7 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 Nov 2023 16:14:30 +0100 Subject: [PATCH 016/110] Silly bug in coo insert --- base/serial/impl/psb_c_coo_impl.F90 | 2 +- base/serial/impl/psb_d_coo_impl.F90 | 2 +- base/serial/impl/psb_s_coo_impl.F90 | 2 +- base/serial/impl/psb_z_coo_impl.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index b1d71321..5c90e287 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -2869,6 +2869,7 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! Hence the call to set_nzeros done here. !$omp critical nza = a%get_nzeros() + nzaold = nza isza = a%get_size() ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then @@ -2879,7 +2880,6 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else #if defined(OPENMP) - nzaold = nza nza = nza + nz #endif call a%set_nzeros(nza) diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index 350085bb..f6a173d1 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -2869,6 +2869,7 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! Hence the call to set_nzeros done here. !$omp critical nza = a%get_nzeros() + nzaold = nza isza = a%get_size() ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then @@ -2879,7 +2880,6 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else #if defined(OPENMP) - nzaold = nza nza = nza + nz #endif call a%set_nzeros(nza) diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 51858efd..4c12d8fc 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -2869,6 +2869,7 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! Hence the call to set_nzeros done here. !$omp critical nza = a%get_nzeros() + nzaold = nza isza = a%get_size() ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then @@ -2879,7 +2880,6 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else #if defined(OPENMP) - nzaold = nza nza = nza + nz #endif call a%set_nzeros(nza) diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 0624dd21..44ee89b5 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -2869,6 +2869,7 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! Hence the call to set_nzeros done here. !$omp critical nza = a%get_nzeros() + nzaold = nza isza = a%get_size() ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then @@ -2879,7 +2880,6 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else #if defined(OPENMP) - nzaold = nza nza = nza + nz #endif call a%set_nzeros(nza) From ab8631439f7f2e4f8afacfb42cbe53c5fe69eec2 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 Nov 2023 16:35:42 +0100 Subject: [PATCH 017/110] Update configure script --- config/pac.m4 | 2 +- configure | 36 +----------------------------------- configure.ac | 35 +++++++++++++++++------------------ 3 files changed, 19 insertions(+), 54 deletions(-) diff --git a/config/pac.m4 b/config/pac.m4 index 7a9ee07e..e4065402 100644 --- a/config/pac.m4 +++ b/config/pac.m4 @@ -2160,7 +2160,7 @@ AC_DEFUN([PAC_ARG_WITH_CUDACC], [ AC_ARG_WITH(cudacc, AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, for example, - --with-cudacc=30,35,37,50,60]), + --with-cudacc=50,60,70,75]), [pac_cv_cudacc=$withval], [pac_cv_cudacc='']) ]) diff --git a/configure b/configure index 8b217231..b98c6ca0 100755 --- a/configure +++ b/configure @@ -830,7 +830,6 @@ enable_openmp with_blas with_blasdir with_lapack -with_rsb with_metis with_metisincfile with_metisdir @@ -1522,12 +1521,6 @@ Optional Packages: --with-blas= use BLAS library --with-blasdir= search for BLAS library in --with-lapack= use LAPACK library - --with-rsb Specify Recursive Sparse BLAS library linkage info - (that is, the output of librsb-config --static - --ldflags, or a directory where the usual - bin/include/lib subdirs with a regular RSB - installation resides, or nothing to make the - configure script invoke librsb-config) --with-metis=LIBNAME Specify the library name for METIS library. Default: "-lmetis" --with-metisincfile=DIR Specify the name for METIS include file. @@ -1542,7 +1535,7 @@ Optional Packages: --with-amdlibdir=DIR Specify the directory for AMD library. --with-cuda=DIR Specify the directory for CUDA library and includes. --with-cudacc A comma-separated list of CCs to compile to, for - example, --with-cudacc=30,35,37,50,60 + example, --with-cudacc=50,60,70,75 Some influential environment variables: FC Fortran compiler command @@ -3301,7 +3294,6 @@ psblas_cv_version="3.8.1" Be sure to specify the library paths of your interest. Examples: ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS - --with-spgpu=/path/to/spgpu FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -3316,7 +3308,6 @@ printf "%s\n" "$as_me: Be sure to specify the library paths of your interest. Examples: ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS - --with-spgpu=/path/to/spgpu FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -10021,31 +10012,6 @@ fi #AC_CHECK_LIB(umf,umfpack_di_solve,psblas_cv_have_umfpack=yes,psblas_cv_have_umfpack=no,[amd]) -# Check whether --with-rsb was given. -if test ${with_rsb+y} -then : - withval=$with_rsb; if test x"$withval" = xno; then -want_rsb_libs= ; else if test x"$withval" = xyes ; then want_rsb_libs=yes ; else want_rsb_libs="$withval" ; fi ; fi -else $as_nop - want_rsb_libs="" -fi - -if test x"$want_rsb_libs" != x ; then - if test x"$want_rsb_libs" = xyes ; then - want_rsb_libs="`librsb-config --static --ldflags`" - else - if test -d "$want_rsb_libs" ; then - want_rsb_libs="`$want_rsb_libs/bin/librsb-config --static --ldflags`" - else - true; - # we assume want_rsb_libs are linkage parameters - fi - fi - FDEFINES="$FDEFINES $psblas_cv_define_prepend-DHAVE_LIBRSB" -fi -RSB_LIBS="$want_rsb_libs" -LIBS="$RSB_LIBS ${LIBS}" - # Check whether --with-metis was given. if test ${with_metis+y} diff --git a/configure.ac b/configure.ac index bfc9b5f9..add09a79 100755 --- a/configure.ac +++ b/configure.ac @@ -57,7 +57,6 @@ AC_MSG_NOTICE([ Be sure to specify the library paths of your interest. Examples: ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS - [ --with-spgpu=/path/to/spgpu] FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -730,23 +729,23 @@ PAC_MAKE_IS_GNUMAKE # Note : also umfdi_local_search, ... #AC_CHECK_LIB(umf,umfpack_di_solve,psblas_cv_have_umfpack=yes,psblas_cv_have_umfpack=no,[amd]) -AC_ARG_WITH(rsb, AS_HELP_STRING([--with-rsb], [Specify Recursive Sparse BLAS library linkage info (that is, the output of librsb-config --static --ldflags, or a directory where the usual bin/include/lib subdirs with a regular RSB installation resides, or nothing to make the configure script invoke librsb-config)]), [if test x"$withval" = xno; then -want_rsb_libs= ; else if test x"$withval" = xyes ; then want_rsb_libs=yes ; else want_rsb_libs="$withval" ; fi ; fi], [want_rsb_libs=""]) -if test x"$want_rsb_libs" != x ; then - if test x"$want_rsb_libs" = xyes ; then - want_rsb_libs="`librsb-config --static --ldflags`" - else - if test -d "$want_rsb_libs" ; then - want_rsb_libs="`$want_rsb_libs/bin/librsb-config --static --ldflags`" - else - true; - # we assume want_rsb_libs are linkage parameters - fi - fi - FDEFINES="$FDEFINES $psblas_cv_define_prepend-DHAVE_LIBRSB" -fi -RSB_LIBS="$want_rsb_libs" -LIBS="$RSB_LIBS ${LIBS}" +dnl AC_ARG_WITH(rsb, AS_HELP_STRING([--with-rsb], [Specify Recursive Sparse BLAS library linkage info (that is, the output of librsb-config --static --ldflags, or a directory where the usual bin/include/lib subdirs with a regular RSB installation resides, or nothing to make the configure script invoke librsb-config)]), [if test x"$withval" = xno; then +dnl want_rsb_libs= ; else if test x"$withval" = xyes ; then want_rsb_libs=yes ; else want_rsb_libs="$withval" ; fi ; fi], [want_rsb_libs=""]) +dnl if test x"$want_rsb_libs" != x ; then +dnl if test x"$want_rsb_libs" = xyes ; then +dnl want_rsb_libs="`librsb-config --static --ldflags`" +dnl else +dnl if test -d "$want_rsb_libs" ; then +dnl want_rsb_libs="`$want_rsb_libs/bin/librsb-config --static --ldflags`" +dnl else +dnl true; +dnl # we assume want_rsb_libs are linkage parameters +dnl fi +dnl fi +dnl FDEFINES="$FDEFINES $psblas_cv_define_prepend-DHAVE_LIBRSB" +dnl fi +dnl RSB_LIBS="$want_rsb_libs" +dnl LIBS="$RSB_LIBS ${LIBS}" dnl AC_CHECK_HEADERS([rsb.h], [ LIBS="${LIBS} $want_rsb_libs"], []) PAC_CHECK_METIS From a6016f00fa3f0d8c34deed848ab735e68ec3b35e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 1 Dec 2023 15:10:13 +0100 Subject: [PATCH 018/110] Bump PSBLAS version to 3.9 --- base/modules/psb_const_mod.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/modules/psb_const_mod.F90 b/base/modules/psb_const_mod.F90 index 56134474..70631932 100644 --- a/base/modules/psb_const_mod.F90 +++ b/base/modules/psb_const_mod.F90 @@ -136,9 +136,9 @@ module psb_const_mod ! ! Version ! - character(len=*), parameter :: psb_version_string_ = "3.8.0" + character(len=*), parameter :: psb_version_string_ = "3.9.0" integer(psb_ipk_), parameter :: psb_version_major_ = 3 - integer(psb_ipk_), parameter :: psb_version_minor_ = 8 + integer(psb_ipk_), parameter :: psb_version_minor_ = 9 integer(psb_ipk_), parameter :: psb_patchlevel_ = 0 ! From e373ed7e0bcdce1f75bcd1ea8d32acfa91dab7ab Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 1 Dec 2023 15:10:39 +0100 Subject: [PATCH 019/110] Modify configry to only use HAVE_CUDA, since SPGU is recompiled. --- Make.inc.in | 1 - config/pac.m4 | 2 +- configure | 59 +++++++++++++++++++++++++++------------------------ configure.ac | 59 ++++++++++++++++++++++++++------------------------- 4 files changed, 62 insertions(+), 59 deletions(-) diff --git a/Make.inc.in b/Make.inc.in index 6a4b378d..858ad336 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -72,7 +72,6 @@ CUDALD=@CUDALD@ LCUDA=@LCUDA@ SPGPU_LIBS=@SPGPU_LIBS@ -SPGPU_DEFINES=@SPGPU_DEFINES@ CUDA_DIR=@CUDA_DIR@ CUDA_DEFINES=@CUDA_DEFINES@ diff --git a/config/pac.m4 b/config/pac.m4 index e4065402..52185439 100644 --- a/config/pac.m4 +++ b/config/pac.m4 @@ -2104,7 +2104,7 @@ dnl dnl @author Salvatore Filippone dnl AC_DEFUN(PAC_CHECK_CUDA, -[AC_ARG_WITH(cuda, AC_HELP_STRING([--with-cuda=DIR], [Specify the directory for CUDA library and includes.]), +[AC_ARG_WITH(cuda, AC_HELP_STRING([--with-cuda=DIR], [Specify the CUDA install directory.]), [pac_cv_cuda_dir=$withval], [pac_cv_cuda_dir='']) diff --git a/configure b/configure index b98c6ca0..48487f98 100755 --- a/configure +++ b/configure @@ -665,7 +665,6 @@ CUDA_INCLUDES CUDA_DEFINES CUDA_DIR EXTRALDLIBS -SPGPU_DEFINES SPGPU_LIBS SPGPU_FLAGS METISINCFILE @@ -1533,7 +1532,7 @@ Optional Packages: --with-amddir=DIR Specify the directory for AMD library and includes. --with-amdincdir=DIR Specify the directory for AMD includes. --with-amdlibdir=DIR Specify the directory for AMD library. - --with-cuda=DIR Specify the directory for CUDA library and includes. + --with-cuda=DIR Specify the CUDA install directory. --with-cudacc A comma-separated list of CCs to compile to, for example, --with-cudacc=50,60,70,75 @@ -10678,7 +10677,7 @@ CPPFLAGS="$SAVE_CPPFLAGS" if test "x$pac_cv_have_cuda" == "xyes"; then -ac_ext=c + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' @@ -10809,17 +10808,15 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu LIBS="$SAVE_LIBS" CPPFLAGS="$SAVE_CPPFLAGS" -CUDA_VERSION="$pac_cv_cuda_version"; -CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); -if test "x$pac_cv_have_cuda" == "xyes" ; then - SPGPU_DEFINES="-DHAVE_SPGPU -DHAVE_GPU"; + CUDA_VERSION="$pac_cv_cuda_version"; + CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); + HAVE_CUDA="yes"; SPGPU_LIBS="-lspgpu"; CUDAD=cudad; CUDALD=cudald; LCUDA="-lpsb_cuda"; EXTRALDLIBS="-lstdc++"; -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: At this point GPUTARGET is $CUDAD $CUDALD" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: At this point GPUTARGET is $CUDAD $CUDALD" >&5 printf "%s\n" "$as_me: At this point GPUTARGET is $CUDAD $CUDALD" >&6;} @@ -10833,26 +10830,27 @@ else $as_nop fi -if test "x$pac_cv_cudacc" == "x"; then - pac_cv_cudacc="30,35,37,50,60"; -fi -CUDEFINES="--dopt=on"; -for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` -do - CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; -done -if test "x$pac_cv_cuda_version" != "xunknown"; then - CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" - FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" - CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" -fi - + if test "x$pac_cv_cudacc" == "x"; then + pac_cv_cudacc="50,60,70,75"; + CUDA_CC="$pac_cv_cudacc"; + fi + CUDEFINES="--dopt=on"; + for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` + do + CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; + done + if test "x$pac_cv_cuda_version" != "xunknown"; then + CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + fi fi if test "x$pac_cv_ipk_size" != "x4"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&5 printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&6;} - SPGPU_DEFINES=""; + HAVE_CUDA="no"; + CUDA_CC=""; SPGPU_LIBS=""; CUDAD=""; CUDALD=""; @@ -10921,9 +10919,9 @@ UTILLIBNAME=libpsb_util.a PSBLASRULES=' PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) -CXXDEFINES=$(PSBCXXDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) -CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) -FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES) +CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES) +FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES) # These should be portable rules, arent they? @@ -10957,7 +10955,6 @@ FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) - ############################################################################### @@ -12290,6 +12287,9 @@ fi FCOPT : ${FCOPT} CCOPT : ${CCOPT} + CUDA : ${HAVE_CUDA} + CUDA_CC : ${CUDA_CC} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} @@ -12320,6 +12320,9 @@ printf "%s\n" "$as_me: FCOPT : ${FCOPT} CCOPT : ${CCOPT} + CUDA : ${HAVE_CUDA} + CUDA_CC : ${CUDA_CC} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} diff --git a/configure.ac b/configure.ac index add09a79..4b76485b 100755 --- a/configure.ac +++ b/configure.ac @@ -793,41 +793,40 @@ fi PAC_CHECK_CUDA() if test "x$pac_cv_have_cuda" == "xyes"; then - -PAC_CHECK_CUDA_VERSION() -CUDA_VERSION="$pac_cv_cuda_version"; -CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); -dnl PAC_CHECK_SPGPU() -if test "x$pac_cv_have_cuda" == "xyes" ; then - SPGPU_DEFINES="-DHAVE_SPGPU -DHAVE_GPU"; + + PAC_CHECK_CUDA_VERSION() + CUDA_VERSION="$pac_cv_cuda_version"; + CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); + dnl PAC_CHECK_SPGPU() + HAVE_CUDA="yes"; SPGPU_LIBS="-lspgpu"; CUDAD=cudad; CUDALD=cudald; LCUDA="-lpsb_cuda"; EXTRALDLIBS="-lstdc++"; -fi -AC_MSG_NOTICE([At this point GPUTARGET is $CUDAD $CUDALD]) - -PAC_ARG_WITH_CUDACC() -if test "x$pac_cv_cudacc" == "x"; then - pac_cv_cudacc="30,35,37,50,60"; -fi -CUDEFINES="--dopt=on"; -for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` -do - CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; -done -if test "x$pac_cv_cuda_version" != "xunknown"; then - CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" - FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" - CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" -fi + AC_MSG_NOTICE([At this point GPUTARGET is $CUDAD $CUDALD]) + PAC_ARG_WITH_CUDACC() + if test "x$pac_cv_cudacc" == "x"; then + pac_cv_cudacc="50,60,70,75"; + CUDA_CC="$pac_cv_cudacc"; + fi + CUDEFINES="--dopt=on"; + for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` + do + CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; + done + if test "x$pac_cv_cuda_version" != "xunknown"; then + CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + fi fi if test "x$pac_cv_ipk_size" != "x4"; then AC_MSG_NOTICE([For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU]) - SPGPU_DEFINES=""; + HAVE_CUDA="no"; + CUDA_CC=""; SPGPU_LIBS=""; CUDAD=""; CUDALD=""; @@ -896,9 +895,9 @@ AC_SUBST(FINCLUDES) PSBLASRULES=' PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) -CXXDEFINES=$(PSBCXXDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) -CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) -FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES) +CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES) +FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES) # These should be portable rules, arent they? @@ -923,7 +922,6 @@ AC_SUBST(METISINCFILE) AC_SUBST(SPGPU_FLAGS) AC_SUBST(SPGPU_LIBS) dnl AC_SUBST(SPGPU_DIR) -AC_SUBST(SPGPU_DEFINES) dnl AC_SUBST(SPGPU_INCLUDES) dnl AC_SUBST(SPGPU_INCDIR) AC_SUBST(EXTRALDLIBS) @@ -962,6 +960,9 @@ AC_MSG_NOTICE([ FCOPT : ${FCOPT} CCOPT : ${CCOPT} + CUDA : ${HAVE_CUDA} + CUDA_CC : ${CUDA_CC} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} From b2b7b074dff09c0b1df060f20dbcf53b52d5ff79 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 1 Dec 2023 15:11:17 +0100 Subject: [PATCH 020/110] Fix usage of HAVE_CUDA/HAVE_GPU (mostly disappeared) --- cuda/base_cusparse_mod.F90 | 4 - cuda/c_cusparse_mod.F90 | 5 -- cuda/ccusparse.c | 2 - cuda/cintrf.h | 4 - cuda/cuda_util.c | 9 --- cuda/cuda_util.h | 2 - cuda/cvectordev.c | 2 - cuda/cvectordev.h | 3 - cuda/d_cusparse_mod.F90 | 6 +- cuda/dcusparse.c | 2 - cuda/diagdev.c | 30 ------- cuda/diagdev.h | 5 -- cuda/diagdev_mod.F90 | 7 -- cuda/dnsdev.c | 62 --------------- cuda/dnsdev.h | 5 -- cuda/dnsdev_mod.F90 | 5 -- cuda/dvectordev.c | 4 - cuda/dvectordev.h | 3 - cuda/elldev.c | 87 --------------------- cuda/elldev.h | 6 -- cuda/elldev_mod.F90 | 5 -- cuda/fcusparse.c | 4 - cuda/fcusparse.h | 2 - cuda/hdiagdev.c | 39 --------- cuda/hdiagdev.h | 5 -- cuda/hdiagdev_mod.F90 | 4 - cuda/hlldev.c | 75 ------------------ cuda/hlldev.h | 5 -- cuda/hlldev_mod.F90 | 5 -- cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 | 6 -- cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 | 6 -- cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 | 6 -- cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 | 23 ------ cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 | 12 --- cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 | 8 -- cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 | 6 -- cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 | 6 -- cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_c_cuda_csrg_csmm.F90 | 8 -- cuda/impl/psb_c_cuda_csrg_csmv.F90 | 8 -- cuda/impl/psb_c_cuda_csrg_from_gpu.F90 | 6 -- cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 | 6 -- cuda/impl/psb_c_cuda_csrg_scal.F90 | 6 -- cuda/impl/psb_c_cuda_csrg_scals.F90 | 6 -- cuda/impl/psb_c_cuda_csrg_to_gpu.F90 | 6 -- cuda/impl/psb_c_cuda_csrg_vect_mv.F90 | 8 -- cuda/impl/psb_c_cuda_diag_csmv.F90 | 9 --- cuda/impl/psb_c_cuda_diag_to_gpu.F90 | 8 -- cuda/impl/psb_c_cuda_diag_vect_mv.F90 | 10 --- cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 | 35 +-------- cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 | 14 ---- cuda/impl/psb_c_cuda_elg_asb.f90 | 1 - cuda/impl/psb_c_cuda_elg_csmm.F90 | 10 --- cuda/impl/psb_c_cuda_elg_csmv.F90 | 9 --- cuda/impl/psb_c_cuda_elg_csput.F90 | 9 --- cuda/impl/psb_c_cuda_elg_from_gpu.F90 | 9 +-- cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 | 7 +- cuda/impl/psb_c_cuda_elg_mold.F90 | 2 - cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 | 7 -- cuda/impl/psb_c_cuda_elg_scal.F90 | 7 -- cuda/impl/psb_c_cuda_elg_scals.F90 | 7 -- cuda/impl/psb_c_cuda_elg_to_gpu.F90 | 9 --- cuda/impl/psb_c_cuda_elg_trim.f90 | 1 - cuda/impl/psb_c_cuda_elg_vect_mv.F90 | 10 --- cuda/impl/psb_c_cuda_hdiag_csmv.F90 | 10 --- cuda/impl/psb_c_cuda_hdiag_mold.F90 | 1 - cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 | 10 --- cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 | 9 --- cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 | 9 --- cuda/impl/psb_c_cuda_hlg_csmm.F90 | 10 --- cuda/impl/psb_c_cuda_hlg_csmv.F90 | 9 --- cuda/impl/psb_c_cuda_hlg_from_gpu.F90 | 7 -- cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 | 7 -- cuda/impl/psb_c_cuda_hlg_mold.F90 | 1 - cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 | 7 -- cuda/impl/psb_c_cuda_hlg_scal.F90 | 7 -- cuda/impl/psb_c_cuda_hlg_scals.F90 | 7 -- cuda/impl/psb_c_cuda_hlg_to_gpu.F90 | 7 -- cuda/impl/psb_c_cuda_hlg_vect_mv.F90 | 10 --- cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_c_cuda_hybg_csmm.F90 | 9 --- cuda/impl/psb_c_cuda_hybg_csmv.F90 | 8 -- cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 | 6 -- cuda/impl/psb_c_cuda_hybg_scal.F90 | 6 -- cuda/impl/psb_c_cuda_hybg_scals.F90 | 6 -- cuda/impl/psb_c_cuda_hybg_to_gpu.F90 | 6 -- cuda/impl/psb_c_cuda_hybg_vect_mv.F90 | 9 --- cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 | 7 -- cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 | 7 -- cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 | 5 -- cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 | 6 -- cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 | 15 +--- cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 | 6 -- cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 | 6 -- cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 | 6 -- cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 | 6 -- cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 | 6 -- cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 | 6 -- cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 | 23 ------ cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 | 12 --- cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 | 8 -- cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 | 6 -- cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 | 6 -- cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_d_cuda_csrg_csmm.F90 | 8 -- cuda/impl/psb_d_cuda_csrg_csmv.F90 | 8 -- cuda/impl/psb_d_cuda_csrg_from_gpu.F90 | 6 -- cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 | 6 -- cuda/impl/psb_d_cuda_csrg_scal.F90 | 6 -- cuda/impl/psb_d_cuda_csrg_scals.F90 | 6 -- cuda/impl/psb_d_cuda_csrg_to_gpu.F90 | 6 -- cuda/impl/psb_d_cuda_csrg_vect_mv.F90 | 8 -- cuda/impl/psb_d_cuda_diag_csmv.F90 | 9 --- cuda/impl/psb_d_cuda_diag_to_gpu.F90 | 8 -- cuda/impl/psb_d_cuda_diag_vect_mv.F90 | 10 --- cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 | 35 +-------- cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 | 14 ---- cuda/impl/psb_d_cuda_elg_asb.f90 | 1 - cuda/impl/psb_d_cuda_elg_csmm.F90 | 10 --- cuda/impl/psb_d_cuda_elg_csmv.F90 | 9 --- cuda/impl/psb_d_cuda_elg_csput.F90 | 9 --- cuda/impl/psb_d_cuda_elg_from_gpu.F90 | 9 +-- cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 | 7 +- cuda/impl/psb_d_cuda_elg_mold.F90 | 2 - cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 | 7 -- cuda/impl/psb_d_cuda_elg_scal.F90 | 7 -- cuda/impl/psb_d_cuda_elg_scals.F90 | 7 -- cuda/impl/psb_d_cuda_elg_to_gpu.F90 | 9 --- cuda/impl/psb_d_cuda_elg_trim.f90 | 1 - cuda/impl/psb_d_cuda_elg_vect_mv.F90 | 10 --- cuda/impl/psb_d_cuda_hdiag_csmv.F90 | 10 --- cuda/impl/psb_d_cuda_hdiag_mold.F90 | 1 - cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 | 10 --- cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 | 9 --- cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 | 9 --- cuda/impl/psb_d_cuda_hlg_csmm.F90 | 10 --- cuda/impl/psb_d_cuda_hlg_csmv.F90 | 9 --- cuda/impl/psb_d_cuda_hlg_from_gpu.F90 | 7 -- cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 | 7 -- cuda/impl/psb_d_cuda_hlg_mold.F90 | 1 - cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 | 7 -- cuda/impl/psb_d_cuda_hlg_scal.F90 | 7 -- cuda/impl/psb_d_cuda_hlg_scals.F90 | 7 -- cuda/impl/psb_d_cuda_hlg_to_gpu.F90 | 7 -- cuda/impl/psb_d_cuda_hlg_vect_mv.F90 | 10 --- cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_d_cuda_hybg_csmm.F90 | 9 --- cuda/impl/psb_d_cuda_hybg_csmv.F90 | 8 -- cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 | 6 -- cuda/impl/psb_d_cuda_hybg_scal.F90 | 6 -- cuda/impl/psb_d_cuda_hybg_scals.F90 | 6 -- cuda/impl/psb_d_cuda_hybg_to_gpu.F90 | 6 -- cuda/impl/psb_d_cuda_hybg_vect_mv.F90 | 9 --- cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 | 7 -- cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 | 7 -- cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 | 5 -- cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 | 6 -- cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 | 15 +--- cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 | 6 -- cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 | 6 -- cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 | 6 -- cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 | 6 -- cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 | 6 -- cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 | 6 -- cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 | 23 ------ cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 | 12 --- cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 | 8 -- cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 | 6 -- cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 | 6 -- cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_s_cuda_csrg_csmm.F90 | 8 -- cuda/impl/psb_s_cuda_csrg_csmv.F90 | 8 -- cuda/impl/psb_s_cuda_csrg_from_gpu.F90 | 6 -- cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 | 6 -- cuda/impl/psb_s_cuda_csrg_scal.F90 | 6 -- cuda/impl/psb_s_cuda_csrg_scals.F90 | 6 -- cuda/impl/psb_s_cuda_csrg_to_gpu.F90 | 6 -- cuda/impl/psb_s_cuda_csrg_vect_mv.F90 | 8 -- cuda/impl/psb_s_cuda_diag_csmv.F90 | 9 --- cuda/impl/psb_s_cuda_diag_to_gpu.F90 | 8 -- cuda/impl/psb_s_cuda_diag_vect_mv.F90 | 10 --- cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 | 35 +-------- cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 | 14 ---- cuda/impl/psb_s_cuda_elg_asb.f90 | 1 - cuda/impl/psb_s_cuda_elg_csmm.F90 | 10 --- cuda/impl/psb_s_cuda_elg_csmv.F90 | 9 --- cuda/impl/psb_s_cuda_elg_csput.F90 | 9 --- cuda/impl/psb_s_cuda_elg_from_gpu.F90 | 9 +-- cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 | 7 +- cuda/impl/psb_s_cuda_elg_mold.F90 | 2 - cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 | 7 -- cuda/impl/psb_s_cuda_elg_scal.F90 | 7 -- cuda/impl/psb_s_cuda_elg_scals.F90 | 7 -- cuda/impl/psb_s_cuda_elg_to_gpu.F90 | 9 --- cuda/impl/psb_s_cuda_elg_trim.f90 | 1 - cuda/impl/psb_s_cuda_elg_vect_mv.F90 | 10 --- cuda/impl/psb_s_cuda_hdiag_csmv.F90 | 10 --- cuda/impl/psb_s_cuda_hdiag_mold.F90 | 1 - cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 | 10 --- cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 | 9 --- cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 | 9 --- cuda/impl/psb_s_cuda_hlg_csmm.F90 | 10 --- cuda/impl/psb_s_cuda_hlg_csmv.F90 | 9 --- cuda/impl/psb_s_cuda_hlg_from_gpu.F90 | 7 -- cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 | 7 -- cuda/impl/psb_s_cuda_hlg_mold.F90 | 1 - cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 | 7 -- cuda/impl/psb_s_cuda_hlg_scal.F90 | 7 -- cuda/impl/psb_s_cuda_hlg_scals.F90 | 7 -- cuda/impl/psb_s_cuda_hlg_to_gpu.F90 | 7 -- cuda/impl/psb_s_cuda_hlg_vect_mv.F90 | 10 --- cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_s_cuda_hybg_csmm.F90 | 9 --- cuda/impl/psb_s_cuda_hybg_csmv.F90 | 8 -- cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 | 6 -- cuda/impl/psb_s_cuda_hybg_scal.F90 | 6 -- cuda/impl/psb_s_cuda_hybg_scals.F90 | 6 -- cuda/impl/psb_s_cuda_hybg_to_gpu.F90 | 6 -- cuda/impl/psb_s_cuda_hybg_vect_mv.F90 | 9 --- cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 | 7 -- cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 | 7 -- cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 | 5 -- cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 | 6 -- cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 | 15 +--- cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 | 6 -- cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 | 6 -- cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 | 6 -- cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 | 6 -- cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 | 6 -- cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 | 6 -- cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 | 23 ------ cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 | 12 --- cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 | 8 -- cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 | 6 -- cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 | 6 -- cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_z_cuda_csrg_csmm.F90 | 8 -- cuda/impl/psb_z_cuda_csrg_csmv.F90 | 8 -- cuda/impl/psb_z_cuda_csrg_from_gpu.F90 | 6 -- cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 | 6 -- cuda/impl/psb_z_cuda_csrg_scal.F90 | 6 -- cuda/impl/psb_z_cuda_csrg_scals.F90 | 6 -- cuda/impl/psb_z_cuda_csrg_to_gpu.F90 | 6 -- cuda/impl/psb_z_cuda_csrg_vect_mv.F90 | 8 -- cuda/impl/psb_z_cuda_diag_csmv.F90 | 9 --- cuda/impl/psb_z_cuda_diag_to_gpu.F90 | 8 -- cuda/impl/psb_z_cuda_diag_vect_mv.F90 | 10 --- cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 | 35 +-------- cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 | 14 ---- cuda/impl/psb_z_cuda_elg_asb.f90 | 1 - cuda/impl/psb_z_cuda_elg_csmm.F90 | 10 --- cuda/impl/psb_z_cuda_elg_csmv.F90 | 9 --- cuda/impl/psb_z_cuda_elg_csput.F90 | 9 --- cuda/impl/psb_z_cuda_elg_from_gpu.F90 | 9 +-- cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 | 7 +- cuda/impl/psb_z_cuda_elg_mold.F90 | 2 - cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 | 7 -- cuda/impl/psb_z_cuda_elg_scal.F90 | 7 -- cuda/impl/psb_z_cuda_elg_scals.F90 | 7 -- cuda/impl/psb_z_cuda_elg_to_gpu.F90 | 9 --- cuda/impl/psb_z_cuda_elg_trim.f90 | 1 - cuda/impl/psb_z_cuda_elg_vect_mv.F90 | 10 --- cuda/impl/psb_z_cuda_hdiag_csmv.F90 | 10 --- cuda/impl/psb_z_cuda_hdiag_mold.F90 | 1 - cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 | 10 --- cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 | 9 --- cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 | 9 --- cuda/impl/psb_z_cuda_hlg_csmm.F90 | 10 --- cuda/impl/psb_z_cuda_hlg_csmv.F90 | 9 --- cuda/impl/psb_z_cuda_hlg_from_gpu.F90 | 7 -- cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 | 7 -- cuda/impl/psb_z_cuda_hlg_mold.F90 | 1 - cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 | 7 -- cuda/impl/psb_z_cuda_hlg_scal.F90 | 7 -- cuda/impl/psb_z_cuda_hlg_scals.F90 | 7 -- cuda/impl/psb_z_cuda_hlg_to_gpu.F90 | 7 -- cuda/impl/psb_z_cuda_hlg_vect_mv.F90 | 10 --- cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_z_cuda_hybg_csmm.F90 | 9 --- cuda/impl/psb_z_cuda_hybg_csmv.F90 | 8 -- cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 | 6 -- cuda/impl/psb_z_cuda_hybg_scal.F90 | 6 -- cuda/impl/psb_z_cuda_hybg_scals.F90 | 6 -- cuda/impl/psb_z_cuda_hybg_to_gpu.F90 | 6 -- cuda/impl/psb_z_cuda_hybg_vect_mv.F90 | 9 --- cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 | 7 -- cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 | 7 -- cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 | 5 -- cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 | 6 -- cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 | 15 +--- cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 | 6 -- cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 | 6 -- cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 | 6 -- cuda/ivectordev.c | 4 - cuda/ivectordev.h | 3 - cuda/psb_base_vectordev_mod.F90 | 7 -- cuda/psb_c_cuda_csrg_mat_mod.F90 | 18 ----- cuda/psb_c_cuda_diag_mat_mod.F90 | 19 ----- cuda/psb_c_cuda_dnsg_mat_mod.F90 | 19 ----- cuda/psb_c_cuda_elg_mat_mod.F90 | 27 ------- cuda/psb_c_cuda_hdiag_mat_mod.F90 | 19 ----- cuda/psb_c_cuda_hlg_mat_mod.F90 | 19 ----- cuda/psb_c_cuda_hybg_mat_mod.F90 | 19 ----- cuda/psb_c_cuda_vect_mod.F90 | 20 +---- cuda/psb_c_vectordev_mod.F90 | 4 - cuda/psb_cuda_env_mod.F90 | 10 --- cuda/psb_d_cuda_csrg_mat_mod.F90 | 18 ----- cuda/psb_d_cuda_diag_mat_mod.F90 | 19 ----- cuda/psb_d_cuda_dnsg_mat_mod.F90 | 19 ----- cuda/psb_d_cuda_elg_mat_mod.F90 | 27 ------- cuda/psb_d_cuda_hdiag_mat_mod.F90 | 19 ----- cuda/psb_d_cuda_hlg_mat_mod.F90 | 19 ----- cuda/psb_d_cuda_hybg_mat_mod.F90 | 19 ----- cuda/psb_d_cuda_vect_mod.F90 | 20 +---- cuda/psb_d_vectordev_mod.F90 | 4 - cuda/psb_i_cuda_csrg_mat_mod.F90 | 18 ----- cuda/psb_i_cuda_diag_mat_mod.F90 | 19 ----- cuda/psb_i_cuda_dnsg_mat_mod.F90 | 19 ----- cuda/psb_i_cuda_elg_mat_mod.F90 | 27 ------- cuda/psb_i_cuda_hdiag_mat_mod.F90 | 19 ----- cuda/psb_i_cuda_hlg_mat_mod.F90 | 19 ----- cuda/psb_i_cuda_hybg_mat_mod.F90 | 19 ----- cuda/psb_i_cuda_vect_mod.F90 | 18 +---- cuda/psb_i_vectordev_mod.F90 | 4 - cuda/psb_s_cuda_csrg_mat_mod.F90 | 18 ----- cuda/psb_s_cuda_diag_mat_mod.F90 | 19 ----- cuda/psb_s_cuda_dnsg_mat_mod.F90 | 19 ----- cuda/psb_s_cuda_elg_mat_mod.F90 | 27 ------- cuda/psb_s_cuda_hdiag_mat_mod.F90 | 19 ----- cuda/psb_s_cuda_hlg_mat_mod.F90 | 19 ----- cuda/psb_s_cuda_hybg_mat_mod.F90 | 19 ----- cuda/psb_s_cuda_vect_mod.F90 | 20 +---- cuda/psb_s_vectordev_mod.F90 | 4 - cuda/psb_z_cuda_csrg_mat_mod.F90 | 18 ----- cuda/psb_z_cuda_diag_mat_mod.F90 | 19 ----- cuda/psb_z_cuda_dnsg_mat_mod.F90 | 19 ----- cuda/psb_z_cuda_elg_mat_mod.F90 | 27 ------- cuda/psb_z_cuda_hdiag_mat_mod.F90 | 19 ----- cuda/psb_z_cuda_hlg_mat_mod.F90 | 19 ----- cuda/psb_z_cuda_hybg_mat_mod.F90 | 19 ----- cuda/psb_z_cuda_vect_mod.F90 | 20 +---- cuda/psb_z_vectordev_mod.F90 | 4 - cuda/s_cusparse_mod.F90 | 5 -- cuda/scusparse.c | 2 - cuda/svectordev.c | 3 - cuda/svectordev.h | 2 - cuda/vectordev.c | 3 - cuda/vectordev.h | 3 - cuda/z_cusparse_mod.F90 | 4 - cuda/zcusparse.c | 2 - cuda/zvectordev.c | 3 - cuda/zvectordev.h | 3 - 376 files changed, 26 insertions(+), 3547 deletions(-) diff --git a/cuda/base_cusparse_mod.F90 b/cuda/base_cusparse_mod.F90 index 9f5628be..94a8255f 100644 --- a/cuda/base_cusparse_mod.F90 +++ b/cuda/base_cusparse_mod.F90 @@ -79,9 +79,6 @@ module base_cusparse_mod enumerator cusparse_direction_column end enum - -#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) - interface function FcusparseCreate() & & bind(c,name="FcusparseCreate") result(res) @@ -113,5 +110,4 @@ contains res = FcusparseDestroy() end function closeFcusparse -#endif end module base_cusparse_mod diff --git a/cuda/c_cusparse_mod.F90 b/cuda/c_cusparse_mod.F90 index e7d37173..59f37732 100644 --- a/cuda/c_cusparse_mod.F90 +++ b/cuda/c_cusparse_mod.F90 @@ -43,9 +43,6 @@ module c_cusparse_mod end type c_Hmat #endif - -#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) - interface CSRGDeviceFree function c_CSRGDeviceFree(Mat) & & bind(c,name="c_CSRGDeviceFree") result(res) @@ -300,6 +297,4 @@ module c_cusparse_mod end interface #endif -#endif - end module c_cusparse_mod diff --git a/cuda/ccusparse.c b/cuda/ccusparse.c index 6f1cfdb3..c5430306 100644 --- a/cuda/ccusparse.c +++ b/cuda/ccusparse.c @@ -33,7 +33,6 @@ #include #include -#ifdef HAVE_SPGPU #include #include #include "cintrf.h" @@ -94,4 +93,3 @@ #include "fcusparse_fct.h" -#endif diff --git a/cuda/cintrf.h b/cuda/cintrf.h index 1a3528aa..3a1f6476 100644 --- a/cuda/cintrf.h +++ b/cuda/cintrf.h @@ -36,7 +36,6 @@ #include #include -#if defined(HAVE_SPGPU) && defined(HAVE_CUDA) #include "core.h" #include "cuda_util.h" #include "vector.h" @@ -45,7 +44,4 @@ #define ELL_PITCH_ALIGN_S 32 #define ELL_PITCH_ALIGN_D 16 - -#endif - #endif diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c index 0fe4a8b7..09265410 100644 --- a/cuda/cuda_util.c +++ b/cuda/cuda_util.c @@ -32,8 +32,6 @@ #include "cuda_util.h" -#if defined(HAVE_CUDA) - static int hasUVA=-1; static struct cudaDeviceProp *prop=NULL; @@ -440,10 +438,6 @@ void psb_cudaDestroyCublasHandle() psb_cublas_handle=NULL; } - - - - /* Simple memory tools */ int allocateInt(void **d_int, int n) @@ -803,6 +797,3 @@ double etime() } - - -#endif diff --git a/cuda/cuda_util.h b/cuda/cuda_util.h index 789c08f4..95c8d1dc 100644 --- a/cuda/cuda_util.h +++ b/cuda/cuda_util.h @@ -38,7 +38,6 @@ #include #include -#if defined(HAVE_CUDA) #include "cuda_runtime.h" #include "core.h" #include "cuComplex.h" @@ -134,6 +133,5 @@ void freeDoubleComplex(void *); double etime(); -#endif #endif diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c index 1dc16667..518154d5 100644 --- a/cuda/cvectordev.c +++ b/cuda/cvectordev.c @@ -32,7 +32,6 @@ #include #include -#if defined(HAVE_SPGPU) //#include "utils.h" //#include "common.h" #include "cvectordev.h" @@ -321,5 +320,4 @@ int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA) return(i); } -#endif diff --git a/cuda/cvectordev.h b/cuda/cvectordev.h index f58fcca7..27c8984a 100644 --- a/cuda/cvectordev.h +++ b/cuda/cvectordev.h @@ -31,7 +31,6 @@ #pragma once -#if defined(HAVE_SPGPU) //#include "utils.h" #include #include "cuComplex.h" @@ -77,5 +76,3 @@ int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA) int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB); - -#endif diff --git a/cuda/d_cusparse_mod.F90 b/cuda/d_cusparse_mod.F90 index cd8bd52f..509253e6 100644 --- a/cuda/d_cusparse_mod.F90 +++ b/cuda/d_cusparse_mod.F90 @@ -43,9 +43,6 @@ module d_cusparse_mod end type d_Hmat #endif - -#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) - interface CSRGDeviceFree function d_CSRGDeviceFree(Mat) & & bind(c,name="d_CSRGDeviceFree") result(res) @@ -298,8 +295,7 @@ module d_cusparse_mod integer(c_int) :: res end function d_HYBGHost2Device end interface -#endif - + #endif end module d_cusparse_mod diff --git a/cuda/dcusparse.c b/cuda/dcusparse.c index 9659c1f9..f14e787c 100644 --- a/cuda/dcusparse.c +++ b/cuda/dcusparse.c @@ -33,7 +33,6 @@ #include #include -#ifdef HAVE_SPGPU #include #include #include "cintrf.h" @@ -92,4 +91,3 @@ #include "fcusparse_fct.h" -#endif diff --git a/cuda/diagdev.c b/cuda/diagdev.c index a2acf1f4..0cf78a41 100644 --- a/cuda/diagdev.c +++ b/cuda/diagdev.c @@ -34,7 +34,6 @@ #include #include #include -#if defined(HAVE_SPGPU) //new DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns, unsigned int diags, unsigned int elementType) { @@ -111,7 +110,6 @@ void freeDiagDevice(void* remoteMatrix) //new int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int columns,unsigned int diags,unsigned int elementType) { int i; -#ifdef HAVE_SPGPU DiagDeviceParams p; p = getDiagDeviceParams(rows, columns, diags,elementType); @@ -120,15 +118,11 @@ int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int columns,u fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); } return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n) { int i,fo,fa; char buf_a[255], buf_o[255],tmp[255]; -#ifdef HAVE_SPGPU struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; // Ex updateFromHost function /* memset(buf_a,'\0',255); */ @@ -159,14 +153,10 @@ int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n) return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } int readDiagDeviceDouble(void* deviceMat, double* a, int* off) { int i; -#ifdef HAVE_SPGPU struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double)); i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); @@ -174,9 +164,6 @@ int readDiagDeviceDouble(void* deviceMat, double* a, int* off) fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } //new @@ -188,7 +175,6 @@ int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU #ifdef VERBOSE /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ @@ -201,16 +187,12 @@ int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, //cudaSync(); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n) { int i,fo,fa; char buf_a[255], buf_o[255],tmp[255]; -#ifdef HAVE_SPGPU struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; // Ex updateFromHost function /* memset(buf_a,'\0',255); */ @@ -241,14 +223,10 @@ int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n) return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } int readDiagDeviceFloat(void* deviceMat, float* a, int* off) { int i; -#ifdef HAVE_SPGPU struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float)); i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); @@ -256,9 +234,6 @@ int readDiagDeviceFloat(void* deviceMat, float* a, int* off) fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } //new @@ -270,7 +245,6 @@ int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU #ifdef VERBOSE /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ @@ -283,9 +257,5 @@ int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, //cudaSync(); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } -#endif diff --git a/cuda/diagdev.h b/cuda/diagdev.h index 83f38289..2efbea92 100644 --- a/cuda/diagdev.h +++ b/cuda/diagdev.h @@ -32,7 +32,6 @@ #ifndef _DIAGDEV_H_ #define _DIAGDEV_H_ -#ifdef HAVE_SPGPU #include "cintrf.h" #include "dia.h" @@ -88,8 +87,4 @@ int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, -#else -#define CINTRF_UNSUPPORTED -1 -#endif - #endif diff --git a/cuda/diagdev_mod.F90 b/cuda/diagdev_mod.F90 index cbcc029e..70d58d4e 100644 --- a/cuda/diagdev_mod.F90 +++ b/cuda/diagdev_mod.F90 @@ -41,8 +41,6 @@ module diagdev_mod integer(c_int) :: firstIndex end type diagdev_parms -#ifdef HAVE_SPGPU - interface function FgetDiagDeviceParams(rows, columns, elementType, firstIndex) & & result(res) bind(c,name='getDiagDeviceParams') @@ -65,7 +63,6 @@ module diagdev_mod end function FallocDiagDevice end interface - interface writeDiagDevice function writeDiagDeviceFloat(deviceMat,a,off,n) & @@ -174,7 +171,6 @@ module diagdev_mod end function getDiagTimer end interface - interface function getDiagDevicePitch(deviceMat) & & bind(c,name='getDiagDevicePitch') result(res) @@ -225,7 +221,4 @@ module diagdev_mod end function spmvDiagDeviceDoubleComplex end interface spmvDiagDevice -#endif - - end module diagdev_mod diff --git a/cuda/dnsdev.c b/cuda/dnsdev.c index 25cddc87..0a991012 100644 --- a/cuda/dnsdev.c +++ b/cuda/dnsdev.c @@ -31,8 +31,6 @@ #include #include "dnsdev.h" -#if defined(HAVE_SPGPU) - #define PASS_RS 0 #define IMIN(a,b) ((a)<(b) ? (a) : (b)) @@ -102,7 +100,6 @@ int FallocDnsDevice(void** deviceMat, unsigned int rows, unsigned int columns, unsigned int elementType, unsigned int firstIndex) { int i; -#ifdef HAVE_SPGPU DnsDeviceParams p; p = getDnsDeviceParams(rows, columns, elementType, firstIndex); @@ -111,9 +108,6 @@ int FallocDnsDevice(void** deviceMat, unsigned int rows, fprintf(stderr,"From routine : %s : %d \n","FallocDnsDevice",i); } return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -124,7 +118,6 @@ int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha, struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; int status; -#ifdef HAVE_SPGPU cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); @@ -143,9 +136,6 @@ int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha, return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha, @@ -155,7 +145,6 @@ int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha, struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; int status; -#ifdef HAVE_SPGPU cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); @@ -174,9 +163,6 @@ int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha, return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *alpha, @@ -186,7 +172,6 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; int status; -#ifdef HAVE_SPGPU cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); @@ -205,9 +190,6 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex *alpha, @@ -217,7 +199,6 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; int status; -#ifdef HAVE_SPGPU cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); @@ -236,15 +217,11 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasSetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch); @@ -252,14 +229,10 @@ int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloat",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasSetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch); @@ -267,15 +240,11 @@ int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDouble",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasSetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch); @@ -283,14 +252,10 @@ int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloatComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasSetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch); @@ -298,15 +263,11 @@ int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, i fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDoubleComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasGetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch); @@ -314,14 +275,10 @@ int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloat",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasGetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch); @@ -329,15 +286,11 @@ int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDouble",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasGetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch); @@ -345,14 +298,10 @@ int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloatComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasGetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch); @@ -360,24 +309,13 @@ int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, in fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDoubleComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int getDnsDevicePitch(void* deviceMat) { int i; struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; -#ifdef HAVE_SPGPU i = devMat->pitch; return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } - - -#endif - diff --git a/cuda/dnsdev.h b/cuda/dnsdev.h index 7c8b06c9..1c335bf9 100644 --- a/cuda/dnsdev.h +++ b/cuda/dnsdev.h @@ -33,7 +33,6 @@ #ifndef _DNSDEV_H_ #define _DNSDEV_H_ -#if defined(HAVE_SPGPU) #include "cintrf.h" #include "cuComplex.h" #include "cublas_v2.h" @@ -115,8 +114,4 @@ int getDnsDevicePitch(void* deviceMat); //int spmvDnsDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY); //int spmvDnsDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY); -#else -#define CINTRF_UNSUPPORTED -1 -#endif - #endif diff --git a/cuda/dnsdev_mod.F90 b/cuda/dnsdev_mod.F90 index 8b96b918..fd257e0e 100644 --- a/cuda/dnsdev_mod.F90 +++ b/cuda/dnsdev_mod.F90 @@ -44,8 +44,6 @@ module dnsdev_mod integer(c_int) :: firstIndex end type dnsdev_parms -#ifdef HAVE_SPGPU - interface function FgetDnsDeviceParams(rows, columns, elementType, firstIndex) & & result(res) bind(c,name='getDnsDeviceParams') @@ -269,7 +267,4 @@ module dnsdev_mod end interface -#endif - - end module dnsdev_mod diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c index eae82c1e..39aa5b2a 100644 --- a/cuda/dvectordev.c +++ b/cuda/dvectordev.c @@ -32,7 +32,6 @@ #include #include -#if defined(HAVE_SPGPU) //#include "utils.h" //#include "common.h" #include "dvectordev.h" @@ -300,6 +299,3 @@ int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA) return(i); } - -#endif - diff --git a/cuda/dvectordev.h b/cuda/dvectordev.h index 960958c5..25905c60 100644 --- a/cuda/dvectordev.h +++ b/cuda/dvectordev.h @@ -31,7 +31,6 @@ #pragma once -#if defined(HAVE_SPGPU) //#include "utils.h" #include "vectordev.h" #include "cuda_runtime.h" @@ -74,5 +73,3 @@ int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA); int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, void *deviceVecB); - -#endif diff --git a/cuda/elldev.c b/cuda/elldev.c index eff89efa..3b79a863 100644 --- a/cuda/elldev.c +++ b/cuda/elldev.c @@ -31,8 +31,6 @@ #include #include "elldev.h" -#if defined(HAVE_SPGPU) - #define PASS_RS 0 EllDeviceParams getEllDeviceParams(unsigned int rows, unsigned int maxRowSize, @@ -140,7 +138,6 @@ int FallocEllDevice(void** deviceMat,unsigned int rows, unsigned int maxRowSize, unsigned int columns, unsigned int elementType, unsigned int firstIndex) { int i; -#ifdef HAVE_SPGPU EllDeviceParams p; p = getEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex); @@ -149,9 +146,6 @@ int FallocEllDevice(void** deviceMat,unsigned int rows, unsigned int maxRowSize, fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); } return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, int* rP, int* rS, @@ -182,7 +176,6 @@ int spmvEllDeviceFloat(void *deviceMat, float alpha, void* deviceX, struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; -#ifdef HAVE_SPGPU #ifdef VERBOSE __assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors"); __assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)"); @@ -196,9 +189,6 @@ int spmvEllDeviceFloat(void *deviceMat, float alpha, void* deviceX, devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch, (float *)x->v_, beta, devMat->baseIndex); return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -234,7 +224,6 @@ int spmvEllDeviceDouble(void *deviceMat, double alpha, void* deviceX, struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; -#ifdef HAVE_SPGPU /*spgpuDellspmv (handle, (double*) y->v_, (double*)y->v_, alpha, (double*) devMat->cM, devMat->rP, devMat->cMPitch, devMat->rPPitch, devMat->rS, devMat->rows, (double*)x->v_, beta, devMat->baseIndex);*/ /* fprintf(stderr,"From spmvEllDouble: mat %d %d %d %d y %d %d \n", */ /* devMat->avgRowSize, devMat->maxRowSize, devMat->rows, */ @@ -246,9 +235,6 @@ int spmvEllDeviceDouble(void *deviceMat, double alpha, void* deviceX, (double *)x->v_, beta, devMat->baseIndex); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } void @@ -281,7 +267,6 @@ int spmvEllDeviceFloatComplex(void *deviceMat, float complex alpha, void* device struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; -#ifdef HAVE_SPGPU cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha)); cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta)); cspmdmm_gpu ((cuFloatComplex *)y->v_, y->count_, y->pitch_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM, @@ -289,9 +274,6 @@ int spmvEllDeviceFloatComplex(void *deviceMat, float complex alpha, void* device (cuFloatComplex *)x->v_, b, devMat->baseIndex); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } void @@ -323,7 +305,6 @@ int spmvEllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* devi struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; -#ifdef HAVE_SPGPU cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha)); cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta)); zspmdmm_gpu ((cuDoubleComplex *)y->v_, y->count_, y->pitch_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM, @@ -331,14 +312,10 @@ int spmvEllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* devi devMat->pitch, (cuDoubleComplex *)x->v_, b, devMat->baseIndex); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); @@ -350,14 +327,10 @@ int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); @@ -370,14 +343,10 @@ int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* ir fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); @@ -390,14 +359,10 @@ int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); @@ -410,14 +375,10 @@ int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, i fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -428,14 +389,10 @@ int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -445,14 +402,10 @@ int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -462,14 +415,10 @@ int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -479,32 +428,21 @@ int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, in fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int getEllDevicePitch(void* deviceMat) { int i; struct EllDevice *devMat = (struct EllDevice *) deviceMat; -#ifdef HAVE_SPGPU i = devMat->pitch; //old //i = getPitchEllDevice(deviceMat); return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } int getEllDeviceMaxRowSize(void* deviceMat) { int i; struct EllDevice *devMat = (struct EllDevice *) deviceMat; -#ifdef HAVE_SPGPU i = devMat->maxRowSize; return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -515,7 +453,6 @@ int getEllDeviceMaxRowSize(void* deviceMat) int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, int *idisp, int *ja, float *val, void *deviceMat) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; float *devVal; int *devIdisp, *devJa; @@ -548,9 +485,6 @@ int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -558,7 +492,6 @@ int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, int *idisp, int *ja, double *val, void *deviceMat) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; double *devVal; int *devIdisp, *devJa; @@ -591,16 +524,12 @@ int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, int *idisp, int *ja, float complex *val, void *deviceMat) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; float complex *devVal; int *devIdisp, *devJa; @@ -633,9 +562,6 @@ int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, in fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloatComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -643,7 +569,6 @@ int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, in int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, int *idisp, int *ja, double complex *val, void *deviceMat) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; double complex *devVal; int *devIdisp, *devJa; @@ -676,15 +601,11 @@ int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, i fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDoubleComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int dev_csputEllDeviceFloat(void* deviceMat, int nnz, void *ia, void *ja, void *val) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; @@ -699,13 +620,11 @@ int dev_csputEllDeviceFloat(void* deviceMat, int nnz, void *ia, void *ja, void * devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, nnz, devIa->v_, devJa->v_, (float *) devVal->v_, 1); -#endif return SPGPU_SUCCESS; } int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void *val) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; @@ -720,7 +639,6 @@ int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, nnz, devIa->v_, devJa->v_, (double *) devVal->v_, 1); -#endif return SPGPU_SUCCESS; } @@ -728,7 +646,6 @@ int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz, void *ia, void *ja, void *val) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; @@ -743,14 +660,12 @@ int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz, devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, nnz, devIa->v_, devJa->v_, (cuFloatComplex *) devVal->v_, 1); -#endif return SPGPU_SUCCESS; } int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz, void *ia, void *ja, void *val) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; @@ -765,9 +680,7 @@ int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz, devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, nnz, devIa->v_, devJa->v_, (cuDoubleComplex *) devVal->v_, 1); -#endif return SPGPU_SUCCESS; } -#endif diff --git a/cuda/elldev.h b/cuda/elldev.h index 6a0814e2..c1001439 100644 --- a/cuda/elldev.h +++ b/cuda/elldev.h @@ -33,12 +33,10 @@ #ifndef _ELLDEV_H_ #define _ELLDEV_H_ -#if defined(HAVE_SPGPU) #include "cintrf.h" #include "cuComplex.h" #include "ell.h" - struct EllDevice { // Compressed matrix @@ -176,8 +174,4 @@ int getEllDevicePitch(void* deviceMat); //int spmvEllDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY); //int spmvEllDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY); -#else -#define CINTRF_UNSUPPORTED -1 -#endif - #endif diff --git a/cuda/elldev_mod.F90 b/cuda/elldev_mod.F90 index 49656d19..40cf8e49 100644 --- a/cuda/elldev_mod.F90 +++ b/cuda/elldev_mod.F90 @@ -44,8 +44,6 @@ module elldev_mod integer(c_int) :: firstIndex end type elldev_parms -#ifdef HAVE_SPGPU - interface function FgetEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex) & & result(res) bind(c,name='getEllDeviceParams') @@ -320,7 +318,4 @@ module elldev_mod end function spmvEllDeviceDoubleComplex end interface -#endif - - end module elldev_mod diff --git a/cuda/fcusparse.c b/cuda/fcusparse.c index 5f0c12d9..c1b661ab 100644 --- a/cuda/fcusparse.c +++ b/cuda/fcusparse.c @@ -33,7 +33,6 @@ #include #include -#ifdef HAVE_SPGPU #include #include "cintrf.h" #include "fcusparse.h" @@ -72,6 +71,3 @@ cusparseHandle_t *getHandle() return(cusparse_handle); } - - -#endif diff --git a/cuda/fcusparse.h b/cuda/fcusparse.h index 2bab2aca..7d2972f8 100644 --- a/cuda/fcusparse.h +++ b/cuda/fcusparse.h @@ -33,7 +33,6 @@ #ifndef FCUSPARSE_ #define FCUSPARSE_ -#ifdef HAVE_SPGPU #include #if CUDA_SHORT_VERSION <= 10 #include @@ -67,4 +66,3 @@ cusparseHandle_t *getHandle(); } #endif -#endif diff --git a/cuda/hdiagdev.c b/cuda/hdiagdev.c index dd5a23cd..6302eed1 100644 --- a/cuda/hdiagdev.c +++ b/cuda/hdiagdev.c @@ -34,10 +34,7 @@ #include #include #include -#if defined(HAVE_SPGPU) #define DEBUG 0 - - void freeHdiagDevice(void* remoteMatrix) { struct HdiagDevice *devMat = (struct HdiagDevice *) remoteMatrix; @@ -138,7 +135,6 @@ int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, unsigned int allocationHeight, unsigned int hackSize, unsigned int hackCount, unsigned int elementType) { int i=0; -#ifdef HAVE_SPGPU HdiagDeviceParams p; p = getHdiagDeviceParams(rows, cols, allocationHeight, hackSize, hackCount,elementType); @@ -152,17 +148,12 @@ int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); } return(i); -#else - return SPGPU_UNSUPPORTED; -#endif - } int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets) { int i=0,fo,fa,j,k,p; char buf_a[255], buf_o[255],tmp[255]; -#ifdef HAVE_SPGPU struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; i=SPGPU_SUCCESS; @@ -216,9 +207,6 @@ int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int * return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -227,15 +215,12 @@ long long int sizeofHdiagDeviceDouble(void* deviceMat) { int i=0,fo,fa; int *hoff=NULL,*hackoff=NULL; long long int memsize=0; -#ifdef HAVE_SPGPU struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; memsize += (devMat->hackCount+1)*sizeof(int); memsize += devMat->allocationHeight*sizeof(int); memsize += devMat->allocationHeight*devMat->hackSize*sizeof(double); - -#endif return(memsize); } @@ -243,7 +228,6 @@ long long int sizeofHdiagDeviceDouble(void* deviceMat) int readHdiagDeviceDouble(void* deviceMat, double* a, int* off) { int i; -#ifdef HAVE_SPGPU struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; /* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double)); */ /* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */ @@ -253,9 +237,6 @@ int readHdiagDeviceDouble(void* deviceMat, double* a, int* off) fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, @@ -266,7 +247,6 @@ int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU #ifdef VERBOSE /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ @@ -285,15 +265,11 @@ int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, //cudaSync(); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets) { int i=0,fo,fa,j,k,p; char buf_a[255], buf_o[255],tmp[255]; -#ifdef HAVE_SPGPU struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; i=SPGPU_SUCCESS; @@ -347,9 +323,6 @@ int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *ha return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -358,7 +331,6 @@ long long int sizeofHdiagDeviceFloat(void* deviceMat) { int i=0,fo,fa; int *hoff=NULL,*hackoff=NULL; long long int memsize=0; -#ifdef HAVE_SPGPU struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; @@ -366,7 +338,6 @@ long long int sizeofHdiagDeviceFloat(void* deviceMat) memsize += devMat->allocationHeight*sizeof(int); memsize += devMat->allocationHeight*devMat->hackSize*sizeof(float); -#endif return(memsize); } @@ -374,7 +345,6 @@ long long int sizeofHdiagDeviceFloat(void* deviceMat) int readHdiagDeviceFloat(void* deviceMat, float* a, int* off) { int i; -#ifdef HAVE_SPGPU struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; /* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float)); */ /* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */ @@ -384,9 +354,6 @@ int readHdiagDeviceFloat(void* deviceMat, float* a, int* off) fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, @@ -397,7 +364,6 @@ int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU #ifdef VERBOSE /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ @@ -416,10 +382,5 @@ int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, //cudaSync(); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } - -#endif diff --git a/cuda/hdiagdev.h b/cuda/hdiagdev.h index 4bce5066..5cd9f803 100644 --- a/cuda/hdiagdev.h +++ b/cuda/hdiagdev.h @@ -32,7 +32,6 @@ #ifndef _HDIAGDEV_H_ #define _HDIAGDEV_H_ -#ifdef HAVE_SPGPU #include "cintrf.h" #include "hdia.h" @@ -104,8 +103,4 @@ int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, double beta, void* deviceY); -#else -#define CINTRF_UNSUPPORTED -1 -#endif - #endif diff --git a/cuda/hdiagdev_mod.F90 b/cuda/hdiagdev_mod.F90 index ad0f7cc5..9a3530e7 100644 --- a/cuda/hdiagdev_mod.F90 +++ b/cuda/hdiagdev_mod.F90 @@ -43,8 +43,6 @@ module hdiagdev_mod integer(c_int) :: allocationHeight end type hdiagdev_parms -#ifdef HAVE_SPGPU - ! interface computeHdiaHacksCount ! function computeHdiaHacksCountDouble(allocationHeight,hackOffsets,hackSize, & ! & diaValues,diaValuesPitch,diags,rows)& @@ -197,7 +195,5 @@ module hdiagdev_mod !!$ complex(c_double_complex),value :: alpha, beta !!$ end function spmvHdiagDeviceDoubleComplex end interface spmvHdiagDevice - -#endif end module hdiagdev_mod diff --git a/cuda/hlldev.c b/cuda/hlldev.c index 42f396dd..9da6a48c 100644 --- a/cuda/hlldev.c +++ b/cuda/hlldev.c @@ -30,7 +30,6 @@ #include "hlldev.h" -#if defined(HAVE_SPGPU) //new HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros, unsigned int allocsize, unsigned int elementType, unsigned int firstIndex) @@ -147,7 +146,6 @@ int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, un unsigned int allocsize, unsigned int elementType, unsigned int firstIndex) { int i; -#ifdef HAVE_SPGPU HllDeviceParams p; p = bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex); @@ -156,9 +154,6 @@ int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, un fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); } return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -170,7 +165,6 @@ int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX, struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU #ifdef VERBOSE /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ @@ -185,9 +179,6 @@ int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX, devMat->avgNzr, devMat->rows, (float *)x->v_, beta, devMat->baseIndex); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } //new @@ -199,7 +190,6 @@ int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX, struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU #ifdef VERBOSE /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ @@ -214,9 +204,6 @@ int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX, devMat->avgNzr, devMat->rows, (double *)x->v_, beta, devMat->baseIndex); //cudaSync(); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX, @@ -227,7 +214,6 @@ int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* device struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha)); cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta)); #ifdef VERBOSE @@ -244,9 +230,6 @@ int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* device devMat->avgNzr, devMat->rows, (cuFloatComplex *)x->v_, b, devMat->baseIndex); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX, @@ -257,7 +240,6 @@ int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* devi struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha)); cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta)); #ifdef VERBOSE @@ -271,14 +253,10 @@ int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* devi devMat->avgNzr,devMat->rows, (cuDoubleComplex *)x->v_, b, devMat->baseIndex); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); @@ -291,14 +269,10 @@ int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); @@ -311,14 +285,10 @@ int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); @@ -331,14 +301,10 @@ int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); @@ -351,14 +317,10 @@ int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, i fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -370,14 +332,10 @@ int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* i fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -388,14 +346,10 @@ int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -406,14 +360,10 @@ int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -424,9 +374,6 @@ int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, in fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } // New copy routines. @@ -435,7 +382,6 @@ int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz int *irn, int *hoffs, int *idisp, int *ja, float *val, void *deviceMat) { int i,j; -#ifdef HAVE_SPGPU spgpuHandle_t handle; HllDevice *devMat = (HllDevice *) deviceMat; float *devVal; @@ -469,16 +415,12 @@ int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloat",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz, int *irn, int *hoffs, int *idisp, int *ja, double *val, void *deviceMat) { int i,j; -#ifdef HAVE_SPGPU spgpuHandle_t handle; HllDevice *devMat = (HllDevice *) deviceMat; double *devVal; @@ -517,16 +459,12 @@ int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int is fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDouble",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz, int *irn, int *hoffs, int *idisp, int *ja, float complex *val, void *deviceMat) { int i,j; -#ifdef HAVE_SPGPU spgpuHandle_t handle; HllDevice *devMat = (HllDevice *) deviceMat; float complex *devVal; @@ -560,16 +498,12 @@ int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloatComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz, int *irn, int *hoffs, int *idisp, int *ja, double complex *val, void *deviceMat) { int i,j; -#ifdef HAVE_SPGPU spgpuHandle_t handle; HllDevice *devMat = (HllDevice *) deviceMat; double complex *devVal; @@ -603,13 +537,4 @@ int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDoubleComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } - - - - - -#endif diff --git a/cuda/hlldev.h b/cuda/hlldev.h index 478ad86e..e4f8259e 100644 --- a/cuda/hlldev.h +++ b/cuda/hlldev.h @@ -32,7 +32,6 @@ #ifndef _HLLDEV_H_ #define _HLLDEV_H_ -#ifdef HAVE_SPGPU #include "cintrf.h" #include "hell.h" @@ -154,8 +153,4 @@ int psi_cuda_z_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, int *idiag, int *rP, double complex *cM); -#else -#define CINTRF_UNSUPPORTED -1 -#endif - #endif diff --git a/cuda/hlldev_mod.F90 b/cuda/hlldev_mod.F90 index 4eaa5ce0..90b8e13c 100644 --- a/cuda/hlldev_mod.F90 +++ b/cuda/hlldev_mod.F90 @@ -43,8 +43,6 @@ module hlldev_mod integer(c_int) :: firstIndex end type hlldev_parms -#ifdef HAVE_SPGPU - interface function bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex) & & result(res) bind(c,name='bldHllDeviceParams') @@ -267,7 +265,4 @@ module hlldev_mod end interface -#endif - - end module hlldev_mod diff --git a/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 index af3301ff..aa6a3ba3 100644 --- a/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 @@ -32,12 +32,8 @@ subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_coo -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a @@ -48,10 +44,8 @@ subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info) call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 index 47845e52..65b12a11 100644 --- a/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 @@ -32,12 +32,8 @@ subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_fmt -#else - use psb_c_cuda_csrg_mat_mod -#endif !use iso_c_binding implicit none @@ -53,9 +49,7 @@ subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) class default call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_c_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 index 5b1eb817..e70a044e 100644 --- a/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 @@ -33,13 +33,9 @@ subroutine psb_c_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_cp_diag_from_coo -#else - use psb_c_cuda_diag_mat_mod -#endif implicit none class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_c_cuda_cp_diag_from_coo(a,b,info) info = psb_success_ call a%psb_c_dia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 index fedffa22..c6105e88 100644 --- a/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 @@ -28,20 +28,14 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_coo use psi_ext_util_mod use psb_cuda_env_mod -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,16 +51,11 @@ subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) integer(psb_ipk_), allocatable :: idisp(:) info = psb_success_ -#ifdef HAVE_SPGPU hacksize = max(1,psb_cuda_WarpSize()) -#else - hacksize = 1 -#endif if (b%is_dev()) call b%sync() if (b%is_by_rows()) then -#ifdef HAVE_SPGPU call psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) @@ -82,15 +71,8 @@ subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & & a%irn,idisp,b%ja,b%val, a%deviceMat) call a%set_dev() -#else - - call psi_c_convert_ell_from_coo(a,b,info,hacksize=hacksize) - call a%set_host() -#endif - else call b%cp_to_coo(tmp,info) -#ifdef HAVE_SPGPU call psi_c_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) @@ -107,11 +89,6 @@ subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) call a%set_dev() -#else - - call psi_c_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) - call a%set_host() -#endif end if if (info /= psb_success_) goto 9999 diff --git a/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 index 4c44d29a..f7e5351e 100644 --- a/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_fmt -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a @@ -51,9 +47,7 @@ subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ if (b%is_dev()) call b%sync() @@ -67,13 +61,9 @@ subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) @@ -88,9 +78,7 @@ subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) a%val(1:m,1:nzm) = b%val(1:m,1:nzm) end if a%nzt = nza -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default diff --git a/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 index 436eabaa..9be741c9 100644 --- a/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_cp_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_c_cuda_hdiag_mat_mod -#endif implicit none class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a @@ -53,16 +47,12 @@ subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_c_hdia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 index d30fccbf..8b0d9f2a 100644 --- a/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 @@ -33,14 +33,10 @@ subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_coo -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a @@ -61,11 +57,7 @@ subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info) info = psb_success_ debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() -#ifdef HAVE_SPGPU hksz = max(1,psb_cuda_WarpSize()) -#else - hksz = psi_get_hksz() -#endif if (b%is_by_rows()) then diff --git a/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 index 259364cd..96a5c5e8 100644 --- a/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_fmt -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info) call a%cp_from_coo(b,info) class default call a%psb_c_hll_sparse_mat%cp_from_fmt(b,info) -#ifdef HAVE_SPGPU if (info == 0) call a%to_gpu(info) -#endif end select if (info /= 0) goto 9999 diff --git a/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 index 7ebb5240..d29da0b7 100644 --- a/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_coo -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a @@ -49,10 +45,8 @@ subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info) call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 index 033ba966..9ed53040 100644 --- a/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_fmt -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info) class default call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_c_cuda_cp_hybg_from_fmt diff --git a/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 index d9736d23..f1e002f3 100644 --- a/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_allocate_mnnz -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz) call a%psb_c_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() if (info == 0) call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_csrg_csmm.F90 b/cuda/impl/psb_c_cuda_csrg_csmm.F90 index 8f2f55b9..b3012952 100644 --- a/cuda/impl/psb_c_cuda_csrg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_csrg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmm -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -94,7 +90,6 @@ subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -119,9 +114,6 @@ subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_csrg_csmv.F90 b/cuda/impl/psb_c_cuda_csrg_csmv.F90 index ba681401..ae90cb7e 100644 --- a/cuda/impl/psb_c_cuda_csrg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_csrg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmv -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -96,7 +92,6 @@ subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -124,9 +119,6 @@ subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_csrg_from_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_from_gpu.F90 index b1bed7e5..503bc57e 100644 --- a/cuda/impl/psb_c_cuda_csrg_from_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_from_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_c_cuda_csrg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_from_gpu -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +44,6 @@ subroutine psb_c_cuda_csrg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat%mat))) then call a%free() return @@ -68,6 +63,5 @@ subroutine psb_c_cuda_csrg_from_gpu(a,info) #endif call a%set_sync() -#endif end subroutine psb_c_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 index 32dec5ef..7e5bb614 100644 --- a/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 @@ -32,13 +32,9 @@ subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_inner_vect_sv -#else - use psb_c_cuda_csrg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a @@ -75,7 +71,6 @@ subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=dzero)) then call x%sync() call y%sync() @@ -112,12 +107,6 @@ subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='csrg_vect_sv') diff --git a/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 index 22f9f118..a757f477 100644 --- a/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_reallocate_nz -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_c_cuda_csrg_reallocate_nz(nz,a) ! call a%psb_c_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_csrg_scal.F90 b/cuda/impl/psb_c_cuda_csrg_scal.F90 index 556a0ec5..13716339 100644 --- a/cuda/impl/psb_c_cuda_csrg_scal.F90 +++ b/cuda/impl/psb_c_cuda_csrg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_csrg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scal -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) @@ -58,10 +54,8 @@ subroutine psb_c_cuda_csrg_scal(d,a,info,side) call a%psb_c_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_csrg_scals.F90 b/cuda/impl/psb_c_cuda_csrg_scals.F90 index a67e91cd..5334be3d 100644 --- a/cuda/impl/psb_c_cuda_csrg_scals.F90 +++ b/cuda/impl/psb_c_cuda_csrg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_csrg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scals -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d @@ -56,10 +52,8 @@ subroutine psb_c_cuda_csrg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 index ea710cbc..aebb07e4 100644 --- a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_to_gpu -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -320,6 +315,5 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in CSRG_TO_GPU ',info end if -#endif end subroutine psb_c_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 index cb556d20..c58e7ec0 100644 --- a/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_vect_mv -#else - use psb_c_cuda_csrg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a @@ -72,7 +68,6 @@ subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= czero) then @@ -112,9 +107,6 @@ subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_diag_csmv.F90 b/cuda/impl/psb_c_cuda_diag_csmv.F90 index 00ab742d..c0940903 100644 --- a/cuda/impl/psb_c_cuda_diag_csmv.F90 +++ b/cuda/impl/psb_c_cuda_diag_csmv.F90 @@ -33,13 +33,9 @@ subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_csmv -#else - use psb_c_cuda_diag_mat_mod -#endif implicit none class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +90,6 @@ subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +116,6 @@ subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +124,4 @@ subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_c_cuda_diag_csmv diff --git a/cuda/impl/psb_c_cuda_diag_to_gpu.F90 b/cuda/impl/psb_c_cuda_diag_to_gpu.F90 index 4f2c21d9..88bbd8b5 100644 --- a/cuda/impl/psb_c_cuda_diag_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_diag_to_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_to_gpu -#else - use psb_c_cuda_diag_mat_mod -#endif use iso_c_binding implicit none class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a @@ -47,13 +43,10 @@ subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm) integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d -#ifdef HAVE_SPGPU type(diagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return n = size(a%data,1) @@ -69,6 +62,5 @@ subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm) if (info == 0) info = & & writeDiagDevice(a%deviceMat,a%data,a%offset,n) ! if (info /= 0) goto 9999 -#endif end subroutine psb_c_cuda_diag_to_gpu diff --git a/cuda/impl/psb_c_cuda_diag_vect_mv.F90 b/cuda/impl/psb_c_cuda_diag_vect_mv.F90 index 02bb9587..fba22bc5 100644 --- a/cuda/impl/psb_c_cuda_diag_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_diag_vect_mv.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_vect_mv -#else - use psb_c_cuda_diag_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_diag_sparse_mat), intent(in) :: a @@ -71,7 +65,6 @@ subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= szero) then @@ -112,9 +105,6 @@ subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 index bb2ec97b..7aaa37f1 100644 --- a/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 @@ -32,13 +32,9 @@ subroutine psb_c_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod use psb_c_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_vect_mv -#else - use psb_c_cuda_dnsg_mat_mod -#endif implicit none class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta @@ -123,13 +119,9 @@ end subroutine psb_c_cuda_dnsg_vect_mv subroutine psb_c_cuda_dnsg_mold(a,b,info) use psb_base_mod use psb_c_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_mold -#else - use psb_c_cuda_dnsg_mat_mod -#endif implicit none class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b @@ -190,17 +182,12 @@ end subroutine psb_c_cuda_dnsg_mold !!$ end subroutine psb_c_cuda_dnsg_allocate_mnnz !!$ end interface - subroutine psb_c_cuda_dnsg_to_gpu(a,info) use psb_base_mod use psb_c_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_to_gpu -#else - use psb_c_cuda_dnsg_mat_mod -#endif class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda @@ -209,15 +196,12 @@ subroutine psb_c_cuda_dnsg_to_gpu(a,info) call psb_erractionsave(err_act) info = psb_success_ -#ifdef HAVE_SPGPU if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& & spgpu_type_complex_float,1) if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info - -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return @@ -233,13 +217,9 @@ end subroutine psb_c_cuda_dnsg_to_gpu subroutine psb_c_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod use psb_c_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_coo -#else - use psb_c_cuda_dnsg_mat_mod -#endif implicit none class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -272,13 +252,9 @@ end subroutine psb_c_cuda_cp_dnsg_from_coo subroutine psb_c_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_c_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_fmt -#else - use psb_c_cuda_dnsg_mat_mod -#endif implicit none class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -348,13 +324,9 @@ end subroutine psb_c_cuda_cp_dnsg_from_fmt subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod use psb_c_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_coo -#else - use psb_c_cuda_dnsg_mat_mod -#endif implicit none class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -383,18 +355,13 @@ subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info) return end subroutine psb_c_cuda_mv_dnsg_from_coo - - + subroutine psb_c_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_c_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_fmt -#else - use psb_c_cuda_dnsg_mat_mod -#endif implicit none class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b diff --git a/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 index 01ca7189..7f50d547 100644 --- a/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_allocate_mnnz -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a @@ -47,9 +41,7 @@ subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ @@ -74,13 +66,9 @@ subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) goto 9999 endif -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_complex_float,1) ld = gpu_parms%pitch nz_ = gpu_parms%maxRowSize -#else - ld = m -#endif if (info == psb_success_) call psb_realloc(m,a%irn,info) if (info == psb_success_) call psb_realloc(m,a%idiag,info) @@ -98,10 +86,8 @@ subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) call a%set_dupl(psb_dupl_def_) end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_elg_asb.f90 b/cuda/impl/psb_c_cuda_elg_asb.f90 index 24af1cc9..16d70736 100644 --- a/cuda/impl/psb_c_cuda_elg_asb.f90 +++ b/cuda/impl/psb_c_cuda_elg_asb.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_asb(a) use psb_base_mod diff --git a/cuda/impl/psb_c_cuda_elg_csmm.F90 b/cuda/impl/psb_c_cuda_elg_csmm.F90 index a5f0e3d5..f7ae9892 100644 --- a/cuda/impl/psb_c_cuda_elg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_elg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmm -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +87,6 @@ subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -119,9 +112,6 @@ subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_elg_csmv.F90 b/cuda/impl/psb_c_cuda_elg_csmv.F90 index 00f39e8c..a23d4a60 100644 --- a/cuda/impl/psb_c_cuda_elg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_elg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmv -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -122,9 +116,6 @@ subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_elg_csput.F90 b/cuda/impl/psb_c_cuda_elg_csput.F90 index cc6fc024..3da928f0 100644 --- a/cuda/impl/psb_c_cuda_elg_csput.F90 +++ b/cuda/impl/psb_c_cuda_elg_csput.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_a -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a @@ -128,13 +123,9 @@ subroutine psb_c_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_v use psb_c_cuda_vect_mod -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_c_cuda_elg_from_gpu.F90 b/cuda/impl/psb_c_cuda_elg_from_gpu.F90 index 593b52be..34c6e4a6 100644 --- a/cuda/impl/psb_c_cuda_elg_from_gpu.F90 +++ b/cuda/impl/psb_c_cuda_elg_from_gpu.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_c_cuda_elg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_from_gpu -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_c_cuda_elg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat))) then call a%free() return @@ -69,6 +63,5 @@ subroutine psb_c_cuda_elg_from_gpu(a,info) if (info == 0) info = & & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_c_cuda_elg_from_gpu diff --git a/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 index 43843dc6..148d72d2 100644 --- a/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 @@ -27,19 +27,14 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! - +! subroutine psb_c_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_inner_vect_sv -#else - use psb_c_cuda_elg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_elg_sparse_mat), intent(in) :: a diff --git a/cuda/impl/psb_c_cuda_elg_mold.F90 b/cuda/impl/psb_c_cuda_elg_mold.F90 index b428055c..bb94bf07 100644 --- a/cuda/impl/psb_c_cuda_elg_mold.F90 +++ b/cuda/impl/psb_c_cuda_elg_mold.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_elg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 index b97530e1..6a1f8763 100644 --- a/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_reallocate_nz -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a @@ -64,10 +59,8 @@ subroutine psb_c_cuda_elg_reallocate_nz(nz,a) goto 9999 end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nzrm) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_elg_scal.F90 b/cuda/impl/psb_c_cuda_elg_scal.F90 index b169451b..65f84768 100644 --- a/cuda/impl/psb_c_cuda_elg_scal.F90 +++ b/cuda/impl/psb_c_cuda_elg_scal.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scal -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) @@ -63,10 +58,8 @@ subroutine psb_c_cuda_elg_scal(d,a,info,side) call a%psb_c_ell_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_elg_scals.F90 b/cuda/impl/psb_c_cuda_elg_scals.F90 index d20ee568..966f2e91 100644 --- a/cuda/impl/psb_c_cuda_elg_scals.F90 +++ b/cuda/impl/psb_c_cuda_elg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scals -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d @@ -59,10 +54,8 @@ subroutine psb_c_cuda_elg_scals(d,a,info) a%val(:,:) = a%val(:,:) * d -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_c_cuda_elg_to_gpu.F90 b/cuda/impl/psb_c_cuda_elg_to_gpu.F90 index 5ea61a41..495207c7 100644 --- a/cuda/impl/psb_c_cuda_elg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_elg_to_gpu.F90 @@ -29,30 +29,22 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_to_gpu -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -88,6 +80,5 @@ subroutine psb_c_cuda_elg_to_gpu(a,info,nzrm) if (info == 0) info = & & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_c_cuda_elg_to_gpu diff --git a/cuda/impl/psb_c_cuda_elg_trim.f90 b/cuda/impl/psb_c_cuda_elg_trim.f90 index 483e189d..78dbe193 100644 --- a/cuda/impl/psb_c_cuda_elg_trim.f90 +++ b/cuda/impl/psb_c_cuda_elg_trim.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_trim(a) use psb_base_mod diff --git a/cuda/impl/psb_c_cuda_elg_vect_mv.F90 b/cuda/impl/psb_c_cuda_elg_vect_mv.F90 index b89ba5a2..9da6a34a 100644 --- a/cuda/impl/psb_c_cuda_elg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_elg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_vect_mv -#else - use psb_c_cuda_elg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_elg_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() if (.not.x%is_host()) call x%sync() @@ -116,10 +110,6 @@ subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - if (a%is_dev()) call a%sync() - call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hdiag_csmv.F90 b/cuda/impl/psb_c_cuda_hdiag_csmv.F90 index 4ea2c269..36928062 100644 --- a/cuda/impl/psb_c_cuda_hdiag_csmv.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_csmv -#else - use psb_c_cuda_hdiag_mat_mod -#endif implicit none class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +123,4 @@ subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_c_cuda_hdiag_csmv diff --git a/cuda/impl/psb_c_cuda_hdiag_mold.F90 b/cuda/impl/psb_c_cuda_hdiag_mold.F90 index 67e0b92e..27402cfc 100644 --- a/cuda/impl/psb_c_cuda_hdiag_mold.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hdiag_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 index 63ab178a..8d1b61a1 100644 --- a/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 @@ -29,29 +29,21 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hdiag_to_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_to_gpu -#else - use psb_c_cuda_hdiag_mat_mod -#endif use iso_c_binding implicit none class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight -#ifdef HAVE_SPGPU type(hdiagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU nr = a%get_nrows() nc = a%get_ncols() hacksize = a%hackSize @@ -81,6 +73,4 @@ subroutine psb_c_cuda_hdiag_to_gpu(a,info) if (info == 0) info = & & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) -#endif - end subroutine psb_c_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 index fb80611f..0c7ce856 100644 --- a/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_vect_mv -#else - use psb_c_cuda_hdiag_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= dzero) then @@ -112,9 +106,6 @@ subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_c_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 index 277b974f..1b41f132 100644 --- a/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_allocate_mnnz -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a @@ -47,19 +42,15 @@ subroutine psb_c_cuda_hlg_allocate_mnnz(m,n,a,nz) Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(hlldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ call a%psb_c_hll_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hlg_csmm.F90 b/cuda/impl/psb_c_cuda_hlg_csmm.F90 index f351ffd0..88aa53a8 100644 --- a/cuda/impl/psb_c_cuda_hlg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_hlg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_csmm -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -93,7 +88,6 @@ subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -118,9 +112,6 @@ subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -128,5 +119,4 @@ subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) return - end subroutine psb_c_cuda_hlg_csmm diff --git a/cuda/impl/psb_c_cuda_hlg_csmv.F90 b/cuda/impl/psb_c_cuda_hlg_csmv.F90 index d39e5f51..18db6ad1 100644 --- a/cuda/impl/psb_c_cuda_hlg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_hlg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_csmv -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_c_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_c_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hlg_from_gpu.F90 b/cuda/impl/psb_c_cuda_hlg_from_gpu.F90 index f823153d..d06d5488 100644 --- a/cuda/impl/psb_c_cuda_hlg_from_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hlg_from_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_from_gpu -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_c_cuda_hlg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (a%is_sync()) return if (a%is_host()) return if (.not.(c_associated(a%deviceMat))) then @@ -71,6 +65,5 @@ subroutine psb_c_cuda_hlg_from_gpu(a,info) if (info == 0) info = & & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_c_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 index 6202885c..87d7c662 100644 --- a/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_inner_vect_sv -#else - use psb_c_cuda_hlg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,11 +64,9 @@ subroutine psb_c_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) goto 9999 end if - call psb_erractionrestore(err_act) return - 9999 call psb_error_handler(err_act) return diff --git a/cuda/impl/psb_c_cuda_hlg_mold.F90 b/cuda/impl/psb_c_cuda_hlg_mold.F90 index 85453422..a702e211 100644 --- a/cuda/impl/psb_c_cuda_hlg_mold.F90 +++ b/cuda/impl/psb_c_cuda_hlg_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 index 848b659d..2ec5fa2c 100644 --- a/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_reallocate_nz -#else - use psb_c_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz @@ -52,10 +47,8 @@ subroutine psb_c_cuda_hlg_reallocate_nz(nz,a) call a%psb_c_hll_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hlg_scal.F90 b/cuda/impl/psb_c_cuda_hlg_scal.F90 index d768048f..770d1734 100644 --- a/cuda/impl/psb_c_cuda_hlg_scal.F90 +++ b/cuda/impl/psb_c_cuda_hlg_scal.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_scal -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) @@ -60,10 +55,8 @@ subroutine psb_c_cuda_hlg_scal(d,a,info,side) call a%psb_c_hll_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hlg_scals.F90 b/cuda/impl/psb_c_cuda_hlg_scals.F90 index 7574bf94..ef6bc1e3 100644 --- a/cuda/impl/psb_c_cuda_hlg_scals.F90 +++ b/cuda/impl/psb_c_cuda_hlg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_scals -#else - use psb_c_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a @@ -59,10 +54,8 @@ subroutine psb_c_cuda_hlg_scals(d,a,info) call a%psb_c_hll_sparse_mat%scal(d,info) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 index d7fc8fb2..d7d179e7 100644 --- a/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_to_gpu -#else - use psb_c_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a @@ -50,7 +45,6 @@ subroutine psb_c_cuda_hlg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return n = a%get_nrows() @@ -63,6 +57,5 @@ subroutine psb_c_cuda_hlg_to_gpu(a,info,nzrm) if (info == 0) info = & & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) ! if (info /= 0) goto 9999 -#endif end subroutine psb_c_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 index 2d7a679e..3789ef17 100644 --- a/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_vect_mv -#else - use psb_c_cuda_hlg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,9 +64,7 @@ subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) goto 9999 endif - tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= czero) then @@ -115,9 +108,6 @@ subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 index eced26e0..bc9f3889 100644 --- a/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_allocate_mnnz -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_c_cuda_hybg_allocate_mnnz(m,n,a,nz) call a%psb_c_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hybg_csmm.F90 b/cuda/impl/psb_c_cuda_hybg_csmm.F90 index cc459f66..227b7d5c 100644 --- a/cuda/impl/psb_c_cuda_hybg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_hybg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_csmm -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +88,6 @@ subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hybg_csmv.F90 b/cuda/impl/psb_c_cuda_hybg_csmv.F90 index ab07d756..e1084022 100644 --- a/cuda/impl/psb_c_cuda_hybg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_hybg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_c_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_csmv -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -95,7 +91,6 @@ subroutine psb_c_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -123,9 +118,6 @@ subroutine psb_c_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 index fcaf49ff..e19aafeb 100644 --- a/cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 @@ -33,13 +33,9 @@ subroutine psb_c_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_inner_vect_sv -#else - use psb_c_cuda_hybg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a @@ -76,7 +72,6 @@ subroutine psb_c_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=czero)) then call x%sync() call y%sync() @@ -113,12 +108,6 @@ subroutine psb_c_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='hybg_vect_sv') diff --git a/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 index 979eaad8..15f33077 100644 --- a/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_reallocate_nz -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_c_cuda_hybg_reallocate_nz(nz,a) ! call a%psb_c_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hybg_scal.F90 b/cuda/impl/psb_c_cuda_hybg_scal.F90 index ac4d788e..ff1c1515 100644 --- a/cuda/impl/psb_c_cuda_hybg_scal.F90 +++ b/cuda/impl/psb_c_cuda_hybg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_hybg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_scal -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) @@ -60,10 +56,8 @@ subroutine psb_c_cuda_hybg_scal(d,a,info,side) call a%psb_c_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hybg_scals.F90 b/cuda/impl/psb_c_cuda_hybg_scals.F90 index 7def71d2..1b2ddf32 100644 --- a/cuda/impl/psb_c_cuda_hybg_scals.F90 +++ b/cuda/impl/psb_c_cuda_hybg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_hybg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_scals -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d @@ -60,10 +56,8 @@ subroutine psb_c_cuda_hybg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 index 1a77586e..15a65abc 100644 --- a/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_to_gpu -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_c_cuda_hybg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -148,7 +143,6 @@ subroutine psb_c_cuda_hybg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in HYBG_TO_GPU ',info end if -#endif end subroutine psb_c_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 index da20ca41..58ce0386 100644 --- a/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_vect_mv -#else - use psb_c_cuda_hybg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a @@ -71,8 +67,6 @@ subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') - -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= czero) then @@ -112,9 +106,6 @@ subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 index f80a8f87..f0a74c09 100644 --- a/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_mv_csrg_from_coo -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a @@ -51,9 +46,7 @@ subroutine psb_c_cuda_mv_csrg_from_coo(a,b,info) call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif if (info /= 0) goto 9999 return diff --git a/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 index 1f23a6c2..eb3698b0 100644 --- a/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_mv_csrg_from_fmt -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,9 +50,7 @@ subroutine psb_c_cuda_mv_csrg_from_fmt(a,b,info) class default call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_c_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 index e20e0b0a..c1ee2ba9 100644 --- a/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_mv_diag_from_coo -#else - use psb_c_cuda_diag_mat_mod -#endif implicit none diff --git a/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 index 741058cd..f9555729 100644 --- a/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_mv_elg_from_coo -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,5 +52,4 @@ subroutine psb_c_cuda_mv_elg_from_coo(a,b,info) return - end subroutine psb_c_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 index b375bc63..59615e25 100644 --- a/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_mv_elg_from_fmt -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a @@ -49,9 +44,7 @@ subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) !locals type(psb_c_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ @@ -65,13 +58,9 @@ subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat call move_alloc(b%irn, a%irn) call move_alloc(b%idiag, a%idiag) @@ -87,9 +76,7 @@ subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) end if a%nzt = nza call b%free() -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default call b%mv_to_coo(tmp,info) diff --git a/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 index 8826081f..21ee731d 100644 --- a/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_mv_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_c_cuda_hdiag_mat_mod -#endif implicit none @@ -54,16 +48,12 @@ subroutine psb_c_cuda_mv_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_c_hdia_sparse_mat%mv_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 index 416bbaed..50c20fad 100644 --- a/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_mv_hlg_from_coo -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 index aafe692d..3fba905a 100644 --- a/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_mv_hlg_from_fmt -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 index eb5ba685..f7cdae40 100644 --- a/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_mv_hybg_from_coo -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_c_cuda_mv_hybg_from_coo(a,b,info) call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 index d74e89bd..9be9f0b4 100644 --- a/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_mv_hybg_from_fmt -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a @@ -54,9 +50,7 @@ subroutine psb_c_cuda_mv_hybg_from_fmt(a,b,info) class default call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_c_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 index e3383af1..ab3a7256 100644 --- a/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 @@ -32,12 +32,8 @@ subroutine psb_d_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_cp_csrg_from_coo -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a @@ -48,10 +44,8 @@ subroutine psb_d_cuda_cp_csrg_from_coo(a,b,info) call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 index 28b46c76..d030538e 100644 --- a/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 @@ -32,12 +32,8 @@ subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_cp_csrg_from_fmt -#else - use psb_d_cuda_csrg_mat_mod -#endif !use iso_c_binding implicit none @@ -53,9 +49,7 @@ subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) class default call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_d_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 index d21bb469..dc0401d5 100644 --- a/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 @@ -33,13 +33,9 @@ subroutine psb_d_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_cp_diag_from_coo -#else - use psb_d_cuda_diag_mat_mod -#endif implicit none class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_d_cuda_cp_diag_from_coo(a,b,info) info = psb_success_ call a%psb_d_dia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 index a4d58297..890bdc39 100644 --- a/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 @@ -28,20 +28,14 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_cp_elg_from_coo use psi_ext_util_mod use psb_cuda_env_mod -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,16 +51,11 @@ subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) integer(psb_ipk_), allocatable :: idisp(:) info = psb_success_ -#ifdef HAVE_SPGPU hacksize = max(1,psb_cuda_WarpSize()) -#else - hacksize = 1 -#endif if (b%is_dev()) call b%sync() if (b%is_by_rows()) then -#ifdef HAVE_SPGPU call psi_d_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) @@ -82,15 +71,8 @@ subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & & a%irn,idisp,b%ja,b%val, a%deviceMat) call a%set_dev() -#else - - call psi_d_convert_ell_from_coo(a,b,info,hacksize=hacksize) - call a%set_host() -#endif - else call b%cp_to_coo(tmp,info) -#ifdef HAVE_SPGPU call psi_d_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) @@ -107,11 +89,6 @@ subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) call a%set_dev() -#else - - call psi_d_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) - call a%set_host() -#endif end if if (info /= psb_success_) goto 9999 diff --git a/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 index 31786c1b..7beea7f1 100644 --- a/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_cp_elg_from_fmt -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a @@ -51,9 +47,7 @@ subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ if (b%is_dev()) call b%sync() @@ -67,13 +61,9 @@ subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) @@ -88,9 +78,7 @@ subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) a%val(1:m,1:nzm) = b%val(1:m,1:nzm) end if a%nzt = nza -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default diff --git a/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 index efcf9d66..82ef4876 100644 --- a/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_cp_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_d_cuda_hdiag_mat_mod -#endif implicit none class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a @@ -53,16 +47,12 @@ subroutine psb_d_cuda_cp_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_d_hdia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 index 2fc898b2..34b999a9 100644 --- a/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 @@ -33,14 +33,10 @@ subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_cp_hlg_from_coo -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a @@ -61,11 +57,7 @@ subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info) info = psb_success_ debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() -#ifdef HAVE_SPGPU hksz = max(1,psb_cuda_WarpSize()) -#else - hksz = psi_get_hksz() -#endif if (b%is_by_rows()) then diff --git a/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 index 0796630c..ecb15157 100644 --- a/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_d_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_cp_hlg_from_fmt -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_d_cuda_cp_hlg_from_fmt(a,b,info) call a%cp_from_coo(b,info) class default call a%psb_d_hll_sparse_mat%cp_from_fmt(b,info) -#ifdef HAVE_SPGPU if (info == 0) call a%to_gpu(info) -#endif end select if (info /= 0) goto 9999 diff --git a/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 index f1f62a89..b236a91d 100644 --- a/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_cp_hybg_from_coo -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a @@ -49,10 +45,8 @@ subroutine psb_d_cuda_cp_hybg_from_coo(a,b,info) call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 index 37c9cc42..87d25252 100644 --- a/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_cp_hybg_from_fmt -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_d_cuda_cp_hybg_from_fmt(a,b,info) class default call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_d_cuda_cp_hybg_from_fmt diff --git a/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 index 3858672c..056f2deb 100644 --- a/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_allocate_mnnz -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_d_cuda_csrg_allocate_mnnz(m,n,a,nz) call a%psb_d_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() if (info == 0) call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_csrg_csmm.F90 b/cuda/impl/psb_d_cuda_csrg_csmm.F90 index 58251d9a..ddac1373 100644 --- a/cuda/impl/psb_d_cuda_csrg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_csrg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_csmm -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -94,7 +90,6 @@ subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -119,9 +114,6 @@ subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_csrg_csmv.F90 b/cuda/impl/psb_d_cuda_csrg_csmv.F90 index 269760f0..c1c889f8 100644 --- a/cuda/impl/psb_d_cuda_csrg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_csrg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_d_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_csmv -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -96,7 +92,6 @@ subroutine psb_d_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -124,9 +119,6 @@ subroutine psb_d_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_csrg_from_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_from_gpu.F90 index c451a99f..9d1c2285 100644 --- a/cuda/impl/psb_d_cuda_csrg_from_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_from_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_d_cuda_csrg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_from_gpu -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +44,6 @@ subroutine psb_d_cuda_csrg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat%mat))) then call a%free() return @@ -68,6 +63,5 @@ subroutine psb_d_cuda_csrg_from_gpu(a,info) #endif call a%set_sync() -#endif end subroutine psb_d_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 index 60ee541f..9a45ee17 100644 --- a/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 @@ -32,13 +32,9 @@ subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_inner_vect_sv -#else - use psb_d_cuda_csrg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a @@ -75,7 +71,6 @@ subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=dzero)) then call x%sync() call y%sync() @@ -112,12 +107,6 @@ subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='csrg_vect_sv') diff --git a/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 index dbf34958..c27cb943 100644 --- a/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_reallocate_nz -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_d_cuda_csrg_reallocate_nz(nz,a) ! call a%psb_d_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_csrg_scal.F90 b/cuda/impl/psb_d_cuda_csrg_scal.F90 index 73e1b9f3..860e9396 100644 --- a/cuda/impl/psb_d_cuda_csrg_scal.F90 +++ b/cuda/impl/psb_d_cuda_csrg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_csrg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_scal -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) @@ -58,10 +54,8 @@ subroutine psb_d_cuda_csrg_scal(d,a,info,side) call a%psb_d_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_csrg_scals.F90 b/cuda/impl/psb_d_cuda_csrg_scals.F90 index cf8d6270..87ef588d 100644 --- a/cuda/impl/psb_d_cuda_csrg_scals.F90 +++ b/cuda/impl/psb_d_cuda_csrg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_csrg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_scals -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d @@ -56,10 +52,8 @@ subroutine psb_d_cuda_csrg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 index a0e72cb4..d1949421 100644 --- a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_to_gpu -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -320,6 +315,5 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in CSRG_TO_GPU ',info end if -#endif end subroutine psb_d_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 index b828d878..03fefbdd 100644 --- a/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_vect_mv -#else - use psb_d_cuda_csrg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a @@ -72,7 +68,6 @@ subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= dzero) then @@ -112,9 +107,6 @@ subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_diag_csmv.F90 b/cuda/impl/psb_d_cuda_diag_csmv.F90 index 8b49769e..0317a369 100644 --- a/cuda/impl/psb_d_cuda_diag_csmv.F90 +++ b/cuda/impl/psb_d_cuda_diag_csmv.F90 @@ -33,13 +33,9 @@ subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_csmv -#else - use psb_d_cuda_diag_mat_mod -#endif implicit none class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +90,6 @@ subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +116,6 @@ subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +124,4 @@ subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_d_cuda_diag_csmv diff --git a/cuda/impl/psb_d_cuda_diag_to_gpu.F90 b/cuda/impl/psb_d_cuda_diag_to_gpu.F90 index 4903de8a..9b648962 100644 --- a/cuda/impl/psb_d_cuda_diag_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_diag_to_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_d_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_to_gpu -#else - use psb_d_cuda_diag_mat_mod -#endif use iso_c_binding implicit none class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a @@ -47,13 +43,10 @@ subroutine psb_d_cuda_diag_to_gpu(a,info,nzrm) integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d -#ifdef HAVE_SPGPU type(diagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return n = size(a%data,1) @@ -69,6 +62,5 @@ subroutine psb_d_cuda_diag_to_gpu(a,info,nzrm) if (info == 0) info = & & writeDiagDevice(a%deviceMat,a%data,a%offset,n) ! if (info /= 0) goto 9999 -#endif end subroutine psb_d_cuda_diag_to_gpu diff --git a/cuda/impl/psb_d_cuda_diag_vect_mv.F90 b/cuda/impl/psb_d_cuda_diag_vect_mv.F90 index 0f23d363..3bc2372d 100644 --- a/cuda/impl/psb_d_cuda_diag_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_diag_vect_mv.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_vect_mv -#else - use psb_d_cuda_diag_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_diag_sparse_mat), intent(in) :: a @@ -71,7 +65,6 @@ subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= szero) then @@ -112,9 +105,6 @@ subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_d_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 index 8d922d82..e90e816a 100644 --- a/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 @@ -32,13 +32,9 @@ subroutine psb_d_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod use psb_d_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_vect_mv -#else - use psb_d_cuda_dnsg_mat_mod -#endif implicit none class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta @@ -123,13 +119,9 @@ end subroutine psb_d_cuda_dnsg_vect_mv subroutine psb_d_cuda_dnsg_mold(a,b,info) use psb_base_mod use psb_d_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_mold -#else - use psb_d_cuda_dnsg_mat_mod -#endif implicit none class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b @@ -190,17 +182,12 @@ end subroutine psb_d_cuda_dnsg_mold !!$ end subroutine psb_d_cuda_dnsg_allocate_mnnz !!$ end interface - subroutine psb_d_cuda_dnsg_to_gpu(a,info) use psb_base_mod use psb_d_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_to_gpu -#else - use psb_d_cuda_dnsg_mat_mod -#endif class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda @@ -209,15 +196,12 @@ subroutine psb_d_cuda_dnsg_to_gpu(a,info) call psb_erractionsave(err_act) info = psb_success_ -#ifdef HAVE_SPGPU if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& & spgpu_type_double,1) if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info - -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return @@ -233,13 +217,9 @@ end subroutine psb_d_cuda_dnsg_to_gpu subroutine psb_d_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod use psb_d_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_cp_dnsg_from_coo -#else - use psb_d_cuda_dnsg_mat_mod -#endif implicit none class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -272,13 +252,9 @@ end subroutine psb_d_cuda_cp_dnsg_from_coo subroutine psb_d_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_d_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_cp_dnsg_from_fmt -#else - use psb_d_cuda_dnsg_mat_mod -#endif implicit none class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -348,13 +324,9 @@ end subroutine psb_d_cuda_cp_dnsg_from_fmt subroutine psb_d_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod use psb_d_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_mv_dnsg_from_coo -#else - use psb_d_cuda_dnsg_mat_mod -#endif implicit none class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -383,18 +355,13 @@ subroutine psb_d_cuda_mv_dnsg_from_coo(a,b,info) return end subroutine psb_d_cuda_mv_dnsg_from_coo - - + subroutine psb_d_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_d_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_mv_dnsg_from_fmt -#else - use psb_d_cuda_dnsg_mat_mod -#endif implicit none class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b diff --git a/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 index b9308514..6db20c96 100644 --- a/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_allocate_mnnz -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a @@ -47,9 +41,7 @@ subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ @@ -74,13 +66,9 @@ subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) goto 9999 endif -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_double,1) ld = gpu_parms%pitch nz_ = gpu_parms%maxRowSize -#else - ld = m -#endif if (info == psb_success_) call psb_realloc(m,a%irn,info) if (info == psb_success_) call psb_realloc(m,a%idiag,info) @@ -98,10 +86,8 @@ subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) call a%set_dupl(psb_dupl_def_) end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_elg_asb.f90 b/cuda/impl/psb_d_cuda_elg_asb.f90 index c158ccde..7d510ee2 100644 --- a/cuda/impl/psb_d_cuda_elg_asb.f90 +++ b/cuda/impl/psb_d_cuda_elg_asb.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_asb(a) use psb_base_mod diff --git a/cuda/impl/psb_d_cuda_elg_csmm.F90 b/cuda/impl/psb_d_cuda_elg_csmm.F90 index 2d9883fa..f77d72d8 100644 --- a/cuda/impl/psb_d_cuda_elg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_elg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csmm -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +87,6 @@ subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -119,9 +112,6 @@ subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_elg_csmv.F90 b/cuda/impl/psb_d_cuda_elg_csmv.F90 index 6420e28d..351ad99d 100644 --- a/cuda/impl/psb_d_cuda_elg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_elg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csmv -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_d_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -122,9 +116,6 @@ subroutine psb_d_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_elg_csput.F90 b/cuda/impl/psb_d_cuda_elg_csput.F90 index 19d26c43..0d16de1f 100644 --- a/cuda/impl/psb_d_cuda_elg_csput.F90 +++ b/cuda/impl/psb_d_cuda_elg_csput.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csput_a -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a @@ -128,13 +123,9 @@ subroutine psb_d_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csput_v use psb_d_cuda_vect_mod -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_d_cuda_elg_from_gpu.F90 b/cuda/impl/psb_d_cuda_elg_from_gpu.F90 index b532a83c..720a6d73 100644 --- a/cuda/impl/psb_d_cuda_elg_from_gpu.F90 +++ b/cuda/impl/psb_d_cuda_elg_from_gpu.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_d_cuda_elg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_from_gpu -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_d_cuda_elg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat))) then call a%free() return @@ -69,6 +63,5 @@ subroutine psb_d_cuda_elg_from_gpu(a,info) if (info == 0) info = & & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_d_cuda_elg_from_gpu diff --git a/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 index c262969f..5e5d72ef 100644 --- a/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 @@ -27,19 +27,14 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! - +! subroutine psb_d_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_inner_vect_sv -#else - use psb_d_cuda_elg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_elg_sparse_mat), intent(in) :: a diff --git a/cuda/impl/psb_d_cuda_elg_mold.F90 b/cuda/impl/psb_d_cuda_elg_mold.F90 index f887f96f..107f19af 100644 --- a/cuda/impl/psb_d_cuda_elg_mold.F90 +++ b/cuda/impl/psb_d_cuda_elg_mold.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_elg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 index 66c583e1..47464760 100644 --- a/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_reallocate_nz -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a @@ -64,10 +59,8 @@ subroutine psb_d_cuda_elg_reallocate_nz(nz,a) goto 9999 end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nzrm) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_elg_scal.F90 b/cuda/impl/psb_d_cuda_elg_scal.F90 index 7aa21c93..420c710e 100644 --- a/cuda/impl/psb_d_cuda_elg_scal.F90 +++ b/cuda/impl/psb_d_cuda_elg_scal.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_scal -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) @@ -63,10 +58,8 @@ subroutine psb_d_cuda_elg_scal(d,a,info,side) call a%psb_d_ell_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_elg_scals.F90 b/cuda/impl/psb_d_cuda_elg_scals.F90 index 1950b366..ff22002e 100644 --- a/cuda/impl/psb_d_cuda_elg_scals.F90 +++ b/cuda/impl/psb_d_cuda_elg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_scals -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d @@ -59,10 +54,8 @@ subroutine psb_d_cuda_elg_scals(d,a,info) a%val(:,:) = a%val(:,:) * d -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_d_cuda_elg_to_gpu.F90 b/cuda/impl/psb_d_cuda_elg_to_gpu.F90 index b589ec2d..9b88af69 100644 --- a/cuda/impl/psb_d_cuda_elg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_elg_to_gpu.F90 @@ -29,30 +29,22 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_to_gpu -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -88,6 +80,5 @@ subroutine psb_d_cuda_elg_to_gpu(a,info,nzrm) if (info == 0) info = & & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_d_cuda_elg_to_gpu diff --git a/cuda/impl/psb_d_cuda_elg_trim.f90 b/cuda/impl/psb_d_cuda_elg_trim.f90 index be573c8c..a371c673 100644 --- a/cuda/impl/psb_d_cuda_elg_trim.f90 +++ b/cuda/impl/psb_d_cuda_elg_trim.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_trim(a) use psb_base_mod diff --git a/cuda/impl/psb_d_cuda_elg_vect_mv.F90 b/cuda/impl/psb_d_cuda_elg_vect_mv.F90 index 1be57d22..f0b83c2b 100644 --- a/cuda/impl/psb_d_cuda_elg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_elg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_vect_mv -#else - use psb_d_cuda_elg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_elg_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() if (.not.x%is_host()) call x%sync() @@ -116,10 +110,6 @@ subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - if (a%is_dev()) call a%sync() - call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hdiag_csmv.F90 b/cuda/impl/psb_d_cuda_hdiag_csmv.F90 index 4bcd6e7a..bf4dacc1 100644 --- a/cuda/impl/psb_d_cuda_hdiag_csmv.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_csmv -#else - use psb_d_cuda_hdiag_mat_mod -#endif implicit none class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +123,4 @@ subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_d_cuda_hdiag_csmv diff --git a/cuda/impl/psb_d_cuda_hdiag_mold.F90 b/cuda/impl/psb_d_cuda_hdiag_mold.F90 index c5028c07..b858b56d 100644 --- a/cuda/impl/psb_d_cuda_hdiag_mold.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hdiag_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 index ca79b9fa..73c4a47d 100644 --- a/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 @@ -29,29 +29,21 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hdiag_to_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_to_gpu -#else - use psb_d_cuda_hdiag_mat_mod -#endif use iso_c_binding implicit none class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight -#ifdef HAVE_SPGPU type(hdiagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU nr = a%get_nrows() nc = a%get_ncols() hacksize = a%hackSize @@ -81,6 +73,4 @@ subroutine psb_d_cuda_hdiag_to_gpu(a,info) if (info == 0) info = & & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) -#endif - end subroutine psb_d_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 index 74233f90..c18c80ac 100644 --- a/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_vect_mv -#else - use psb_d_cuda_hdiag_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= dzero) then @@ -112,9 +106,6 @@ subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 index 3382327f..68d9ab50 100644 --- a/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_allocate_mnnz -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a @@ -47,19 +42,15 @@ subroutine psb_d_cuda_hlg_allocate_mnnz(m,n,a,nz) Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(hlldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ call a%psb_d_hll_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hlg_csmm.F90 b/cuda/impl/psb_d_cuda_hlg_csmm.F90 index a223aace..ee8424e6 100644 --- a/cuda/impl/psb_d_cuda_hlg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_hlg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_csmm -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -93,7 +88,6 @@ subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -118,9 +112,6 @@ subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -128,5 +119,4 @@ subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) return - end subroutine psb_d_cuda_hlg_csmm diff --git a/cuda/impl/psb_d_cuda_hlg_csmv.F90 b/cuda/impl/psb_d_cuda_hlg_csmv.F90 index 04779296..58892c1f 100644 --- a/cuda/impl/psb_d_cuda_hlg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_hlg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_csmv -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_d_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_d_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hlg_from_gpu.F90 b/cuda/impl/psb_d_cuda_hlg_from_gpu.F90 index 7c1a2de8..752fd944 100644 --- a/cuda/impl/psb_d_cuda_hlg_from_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hlg_from_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_from_gpu -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_d_cuda_hlg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (a%is_sync()) return if (a%is_host()) return if (.not.(c_associated(a%deviceMat))) then @@ -71,6 +65,5 @@ subroutine psb_d_cuda_hlg_from_gpu(a,info) if (info == 0) info = & & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_d_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 index c6bd68b5..f4a0424d 100644 --- a/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_inner_vect_sv -#else - use psb_d_cuda_hlg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,11 +64,9 @@ subroutine psb_d_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) goto 9999 end if - call psb_erractionrestore(err_act) return - 9999 call psb_error_handler(err_act) return diff --git a/cuda/impl/psb_d_cuda_hlg_mold.F90 b/cuda/impl/psb_d_cuda_hlg_mold.F90 index dddce134..e41d56f2 100644 --- a/cuda/impl/psb_d_cuda_hlg_mold.F90 +++ b/cuda/impl/psb_d_cuda_hlg_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 index aa2954d6..e696d304 100644 --- a/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_reallocate_nz -#else - use psb_d_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz @@ -52,10 +47,8 @@ subroutine psb_d_cuda_hlg_reallocate_nz(nz,a) call a%psb_d_hll_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hlg_scal.F90 b/cuda/impl/psb_d_cuda_hlg_scal.F90 index 3cbfada0..042e5805 100644 --- a/cuda/impl/psb_d_cuda_hlg_scal.F90 +++ b/cuda/impl/psb_d_cuda_hlg_scal.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_scal -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) @@ -60,10 +55,8 @@ subroutine psb_d_cuda_hlg_scal(d,a,info,side) call a%psb_d_hll_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hlg_scals.F90 b/cuda/impl/psb_d_cuda_hlg_scals.F90 index 1ddf764f..4c81faa9 100644 --- a/cuda/impl/psb_d_cuda_hlg_scals.F90 +++ b/cuda/impl/psb_d_cuda_hlg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_scals -#else - use psb_d_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a @@ -59,10 +54,8 @@ subroutine psb_d_cuda_hlg_scals(d,a,info) call a%psb_d_hll_sparse_mat%scal(d,info) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 index 82737315..566c94bd 100644 --- a/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_to_gpu -#else - use psb_d_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a @@ -50,7 +45,6 @@ subroutine psb_d_cuda_hlg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return n = a%get_nrows() @@ -63,6 +57,5 @@ subroutine psb_d_cuda_hlg_to_gpu(a,info,nzrm) if (info == 0) info = & & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) ! if (info /= 0) goto 9999 -#endif end subroutine psb_d_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 index 9d0741c4..cccba74b 100644 --- a/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_vect_mv -#else - use psb_d_cuda_hlg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,9 +64,7 @@ subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) goto 9999 endif - tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= dzero) then @@ -115,9 +108,6 @@ subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 index b0bff6c0..50bef667 100644 --- a/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_allocate_mnnz -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_d_cuda_hybg_allocate_mnnz(m,n,a,nz) call a%psb_d_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hybg_csmm.F90 b/cuda/impl/psb_d_cuda_hybg_csmm.F90 index 3fcfd17f..d5b49829 100644 --- a/cuda/impl/psb_d_cuda_hybg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_hybg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_csmm -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +88,6 @@ subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hybg_csmv.F90 b/cuda/impl/psb_d_cuda_hybg_csmv.F90 index 5e06f633..b29b889a 100644 --- a/cuda/impl/psb_d_cuda_hybg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_hybg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_d_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_csmv -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -95,7 +91,6 @@ subroutine psb_d_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -123,9 +118,6 @@ subroutine psb_d_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 index a30c1abe..6f5eba3b 100644 --- a/cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 @@ -33,13 +33,9 @@ subroutine psb_d_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_inner_vect_sv -#else - use psb_d_cuda_hybg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a @@ -76,7 +72,6 @@ subroutine psb_d_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=dzero)) then call x%sync() call y%sync() @@ -113,12 +108,6 @@ subroutine psb_d_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='hybg_vect_sv') diff --git a/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 index cadce8d3..f44c3f10 100644 --- a/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_reallocate_nz -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_d_cuda_hybg_reallocate_nz(nz,a) ! call a%psb_d_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hybg_scal.F90 b/cuda/impl/psb_d_cuda_hybg_scal.F90 index 126e25cb..5be8d801 100644 --- a/cuda/impl/psb_d_cuda_hybg_scal.F90 +++ b/cuda/impl/psb_d_cuda_hybg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_hybg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_scal -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) @@ -60,10 +56,8 @@ subroutine psb_d_cuda_hybg_scal(d,a,info,side) call a%psb_d_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hybg_scals.F90 b/cuda/impl/psb_d_cuda_hybg_scals.F90 index 88b7e05c..59e21dd2 100644 --- a/cuda/impl/psb_d_cuda_hybg_scals.F90 +++ b/cuda/impl/psb_d_cuda_hybg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_hybg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_scals -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d @@ -60,10 +56,8 @@ subroutine psb_d_cuda_hybg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 index d94a75c7..7b8e2e5f 100644 --- a/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_to_gpu -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_d_cuda_hybg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -148,7 +143,6 @@ subroutine psb_d_cuda_hybg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in HYBG_TO_GPU ',info end if -#endif end subroutine psb_d_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 index 9d0aedb7..f1119439 100644 --- a/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_vect_mv -#else - use psb_d_cuda_hybg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a @@ -71,8 +67,6 @@ subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') - -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= dzero) then @@ -112,9 +106,6 @@ subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 index 18e7c636..559bfb2c 100644 --- a/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_mv_csrg_from_coo -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a @@ -51,9 +46,7 @@ subroutine psb_d_cuda_mv_csrg_from_coo(a,b,info) call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif if (info /= 0) goto 9999 return diff --git a/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 index 837c78c1..c2411e90 100644 --- a/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_mv_csrg_from_fmt -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,9 +50,7 @@ subroutine psb_d_cuda_mv_csrg_from_fmt(a,b,info) class default call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_d_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 index 8d33c459..a6a39a1c 100644 --- a/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_mv_diag_from_coo -#else - use psb_d_cuda_diag_mat_mod -#endif implicit none diff --git a/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 index ad9e7f10..9886e90c 100644 --- a/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_mv_elg_from_coo -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,5 +52,4 @@ subroutine psb_d_cuda_mv_elg_from_coo(a,b,info) return - end subroutine psb_d_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 index 9cdf790e..da2d47a1 100644 --- a/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_mv_elg_from_fmt -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a @@ -49,9 +44,7 @@ subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) !locals type(psb_d_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ @@ -65,13 +58,9 @@ subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat call move_alloc(b%irn, a%irn) call move_alloc(b%idiag, a%idiag) @@ -87,9 +76,7 @@ subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) end if a%nzt = nza call b%free() -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default call b%mv_to_coo(tmp,info) diff --git a/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 index aff5e0c0..b3c4f650 100644 --- a/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_mv_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_d_cuda_hdiag_mat_mod -#endif implicit none @@ -54,16 +48,12 @@ subroutine psb_d_cuda_mv_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_d_hdia_sparse_mat%mv_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 index a2b358c4..95e86293 100644 --- a/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_mv_hlg_from_coo -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 index 130d88c2..02578b19 100644 --- a/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_mv_hlg_from_fmt -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 index 8b0ad032..3113fb13 100644 --- a/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_mv_hybg_from_coo -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_d_cuda_mv_hybg_from_coo(a,b,info) call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 index 71badfc5..520792e9 100644 --- a/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_mv_hybg_from_fmt -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a @@ -54,9 +50,7 @@ subroutine psb_d_cuda_mv_hybg_from_fmt(a,b,info) class default call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_d_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 index b7bebc95..0e3f9113 100644 --- a/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 @@ -32,12 +32,8 @@ subroutine psb_s_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_cp_csrg_from_coo -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a @@ -48,10 +44,8 @@ subroutine psb_s_cuda_cp_csrg_from_coo(a,b,info) call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 index 7ab9283d..29bbea6e 100644 --- a/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 @@ -32,12 +32,8 @@ subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_cp_csrg_from_fmt -#else - use psb_s_cuda_csrg_mat_mod -#endif !use iso_c_binding implicit none @@ -53,9 +49,7 @@ subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) class default call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_s_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 index 9f038a09..07025d77 100644 --- a/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 @@ -33,13 +33,9 @@ subroutine psb_s_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_cp_diag_from_coo -#else - use psb_s_cuda_diag_mat_mod -#endif implicit none class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_s_cuda_cp_diag_from_coo(a,b,info) info = psb_success_ call a%psb_s_dia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 index f6e1ba42..66abf76a 100644 --- a/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 @@ -28,20 +28,14 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_cp_elg_from_coo use psi_ext_util_mod use psb_cuda_env_mod -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,16 +51,11 @@ subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) integer(psb_ipk_), allocatable :: idisp(:) info = psb_success_ -#ifdef HAVE_SPGPU hacksize = max(1,psb_cuda_WarpSize()) -#else - hacksize = 1 -#endif if (b%is_dev()) call b%sync() if (b%is_by_rows()) then -#ifdef HAVE_SPGPU call psi_s_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) @@ -82,15 +71,8 @@ subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & & a%irn,idisp,b%ja,b%val, a%deviceMat) call a%set_dev() -#else - - call psi_s_convert_ell_from_coo(a,b,info,hacksize=hacksize) - call a%set_host() -#endif - else call b%cp_to_coo(tmp,info) -#ifdef HAVE_SPGPU call psi_s_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) @@ -107,11 +89,6 @@ subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) call a%set_dev() -#else - - call psi_s_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) - call a%set_host() -#endif end if if (info /= psb_success_) goto 9999 diff --git a/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 index 0c811426..77df12b3 100644 --- a/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_cp_elg_from_fmt -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a @@ -51,9 +47,7 @@ subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ if (b%is_dev()) call b%sync() @@ -67,13 +61,9 @@ subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) @@ -88,9 +78,7 @@ subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) a%val(1:m,1:nzm) = b%val(1:m,1:nzm) end if a%nzt = nza -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default diff --git a/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 index 07b56fa6..75210478 100644 --- a/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_cp_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_s_cuda_hdiag_mat_mod -#endif implicit none class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a @@ -53,16 +47,12 @@ subroutine psb_s_cuda_cp_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_s_hdia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 index 055fa046..c254b15a 100644 --- a/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 @@ -33,14 +33,10 @@ subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_cp_hlg_from_coo -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a @@ -61,11 +57,7 @@ subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info) info = psb_success_ debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() -#ifdef HAVE_SPGPU hksz = max(1,psb_cuda_WarpSize()) -#else - hksz = psi_get_hksz() -#endif if (b%is_by_rows()) then diff --git a/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 index b49be761..f04b65e5 100644 --- a/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_s_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_cp_hlg_from_fmt -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_s_cuda_cp_hlg_from_fmt(a,b,info) call a%cp_from_coo(b,info) class default call a%psb_s_hll_sparse_mat%cp_from_fmt(b,info) -#ifdef HAVE_SPGPU if (info == 0) call a%to_gpu(info) -#endif end select if (info /= 0) goto 9999 diff --git a/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 index ab135944..69aa615b 100644 --- a/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_cp_hybg_from_coo -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a @@ -49,10 +45,8 @@ subroutine psb_s_cuda_cp_hybg_from_coo(a,b,info) call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 index 62a54759..95e00df1 100644 --- a/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_cp_hybg_from_fmt -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_s_cuda_cp_hybg_from_fmt(a,b,info) class default call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_s_cuda_cp_hybg_from_fmt diff --git a/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 index 53ca8f12..7e6f0c86 100644 --- a/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_allocate_mnnz -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_s_cuda_csrg_allocate_mnnz(m,n,a,nz) call a%psb_s_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() if (info == 0) call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_csrg_csmm.F90 b/cuda/impl/psb_s_cuda_csrg_csmm.F90 index c8ff4a9e..453f5260 100644 --- a/cuda/impl/psb_s_cuda_csrg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_csrg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_csmm -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -94,7 +90,6 @@ subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -119,9 +114,6 @@ subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_csrg_csmv.F90 b/cuda/impl/psb_s_cuda_csrg_csmv.F90 index 72658c28..a387fd34 100644 --- a/cuda/impl/psb_s_cuda_csrg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_csrg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_s_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_csmv -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -96,7 +92,6 @@ subroutine psb_s_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -124,9 +119,6 @@ subroutine psb_s_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_csrg_from_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_from_gpu.F90 index 7811f746..adcb2536 100644 --- a/cuda/impl/psb_s_cuda_csrg_from_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_from_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_s_cuda_csrg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_from_gpu -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +44,6 @@ subroutine psb_s_cuda_csrg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat%mat))) then call a%free() return @@ -68,6 +63,5 @@ subroutine psb_s_cuda_csrg_from_gpu(a,info) #endif call a%set_sync() -#endif end subroutine psb_s_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 index 7f9965d8..df11952c 100644 --- a/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 @@ -32,13 +32,9 @@ subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_inner_vect_sv -#else - use psb_s_cuda_csrg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a @@ -75,7 +71,6 @@ subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=dzero)) then call x%sync() call y%sync() @@ -112,12 +107,6 @@ subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='csrg_vect_sv') diff --git a/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 index fed3b0e7..dfd115e7 100644 --- a/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_reallocate_nz -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_s_cuda_csrg_reallocate_nz(nz,a) ! call a%psb_s_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_csrg_scal.F90 b/cuda/impl/psb_s_cuda_csrg_scal.F90 index 826ab2dd..ea3406a1 100644 --- a/cuda/impl/psb_s_cuda_csrg_scal.F90 +++ b/cuda/impl/psb_s_cuda_csrg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_csrg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_scal -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) @@ -58,10 +54,8 @@ subroutine psb_s_cuda_csrg_scal(d,a,info,side) call a%psb_s_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_csrg_scals.F90 b/cuda/impl/psb_s_cuda_csrg_scals.F90 index 04f4c29a..307d5849 100644 --- a/cuda/impl/psb_s_cuda_csrg_scals.F90 +++ b/cuda/impl/psb_s_cuda_csrg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_csrg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_scals -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d @@ -56,10 +52,8 @@ subroutine psb_s_cuda_csrg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 index eadca5df..cf052e13 100644 --- a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_to_gpu -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -320,6 +315,5 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in CSRG_TO_GPU ',info end if -#endif end subroutine psb_s_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 index 38e2dfc0..52820436 100644 --- a/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_vect_mv -#else - use psb_s_cuda_csrg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a @@ -72,7 +68,6 @@ subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= szero) then @@ -112,9 +107,6 @@ subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_diag_csmv.F90 b/cuda/impl/psb_s_cuda_diag_csmv.F90 index 214cf6f8..016b82bc 100644 --- a/cuda/impl/psb_s_cuda_diag_csmv.F90 +++ b/cuda/impl/psb_s_cuda_diag_csmv.F90 @@ -33,13 +33,9 @@ subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_csmv -#else - use psb_s_cuda_diag_mat_mod -#endif implicit none class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +90,6 @@ subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +116,6 @@ subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +124,4 @@ subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_s_cuda_diag_csmv diff --git a/cuda/impl/psb_s_cuda_diag_to_gpu.F90 b/cuda/impl/psb_s_cuda_diag_to_gpu.F90 index c1ee7401..c8578e75 100644 --- a/cuda/impl/psb_s_cuda_diag_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_diag_to_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_s_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_to_gpu -#else - use psb_s_cuda_diag_mat_mod -#endif use iso_c_binding implicit none class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a @@ -47,13 +43,10 @@ subroutine psb_s_cuda_diag_to_gpu(a,info,nzrm) integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d -#ifdef HAVE_SPGPU type(diagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return n = size(a%data,1) @@ -69,6 +62,5 @@ subroutine psb_s_cuda_diag_to_gpu(a,info,nzrm) if (info == 0) info = & & writeDiagDevice(a%deviceMat,a%data,a%offset,n) ! if (info /= 0) goto 9999 -#endif end subroutine psb_s_cuda_diag_to_gpu diff --git a/cuda/impl/psb_s_cuda_diag_vect_mv.F90 b/cuda/impl/psb_s_cuda_diag_vect_mv.F90 index ab655b7c..d68e5193 100644 --- a/cuda/impl/psb_s_cuda_diag_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_diag_vect_mv.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_vect_mv -#else - use psb_s_cuda_diag_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_diag_sparse_mat), intent(in) :: a @@ -71,7 +65,6 @@ subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= szero) then @@ -112,9 +105,6 @@ subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_s_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 index 861724aa..86fec6a7 100644 --- a/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 @@ -32,13 +32,9 @@ subroutine psb_s_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod use psb_s_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_vect_mv -#else - use psb_s_cuda_dnsg_mat_mod -#endif implicit none class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta @@ -123,13 +119,9 @@ end subroutine psb_s_cuda_dnsg_vect_mv subroutine psb_s_cuda_dnsg_mold(a,b,info) use psb_base_mod use psb_s_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_mold -#else - use psb_s_cuda_dnsg_mat_mod -#endif implicit none class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b @@ -190,17 +182,12 @@ end subroutine psb_s_cuda_dnsg_mold !!$ end subroutine psb_s_cuda_dnsg_allocate_mnnz !!$ end interface - subroutine psb_s_cuda_dnsg_to_gpu(a,info) use psb_base_mod use psb_s_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_to_gpu -#else - use psb_s_cuda_dnsg_mat_mod -#endif class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda @@ -209,15 +196,12 @@ subroutine psb_s_cuda_dnsg_to_gpu(a,info) call psb_erractionsave(err_act) info = psb_success_ -#ifdef HAVE_SPGPU if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& & spgpu_type_float,1) if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info - -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return @@ -233,13 +217,9 @@ end subroutine psb_s_cuda_dnsg_to_gpu subroutine psb_s_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod use psb_s_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_cp_dnsg_from_coo -#else - use psb_s_cuda_dnsg_mat_mod -#endif implicit none class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -272,13 +252,9 @@ end subroutine psb_s_cuda_cp_dnsg_from_coo subroutine psb_s_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_s_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_cp_dnsg_from_fmt -#else - use psb_s_cuda_dnsg_mat_mod -#endif implicit none class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -348,13 +324,9 @@ end subroutine psb_s_cuda_cp_dnsg_from_fmt subroutine psb_s_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod use psb_s_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_mv_dnsg_from_coo -#else - use psb_s_cuda_dnsg_mat_mod -#endif implicit none class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -383,18 +355,13 @@ subroutine psb_s_cuda_mv_dnsg_from_coo(a,b,info) return end subroutine psb_s_cuda_mv_dnsg_from_coo - - + subroutine psb_s_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_s_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_mv_dnsg_from_fmt -#else - use psb_s_cuda_dnsg_mat_mod -#endif implicit none class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b diff --git a/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 index 63c41644..b771ca1b 100644 --- a/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_allocate_mnnz -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a @@ -47,9 +41,7 @@ subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ @@ -74,13 +66,9 @@ subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) goto 9999 endif -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_float,1) ld = gpu_parms%pitch nz_ = gpu_parms%maxRowSize -#else - ld = m -#endif if (info == psb_success_) call psb_realloc(m,a%irn,info) if (info == psb_success_) call psb_realloc(m,a%idiag,info) @@ -98,10 +86,8 @@ subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) call a%set_dupl(psb_dupl_def_) end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_elg_asb.f90 b/cuda/impl/psb_s_cuda_elg_asb.f90 index 0d53c26a..53a17a32 100644 --- a/cuda/impl/psb_s_cuda_elg_asb.f90 +++ b/cuda/impl/psb_s_cuda_elg_asb.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_asb(a) use psb_base_mod diff --git a/cuda/impl/psb_s_cuda_elg_csmm.F90 b/cuda/impl/psb_s_cuda_elg_csmm.F90 index e7f88a2e..ff7b7848 100644 --- a/cuda/impl/psb_s_cuda_elg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_elg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csmm -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +87,6 @@ subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -119,9 +112,6 @@ subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_elg_csmv.F90 b/cuda/impl/psb_s_cuda_elg_csmv.F90 index 1844d338..caf106cb 100644 --- a/cuda/impl/psb_s_cuda_elg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_elg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csmv -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_s_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -122,9 +116,6 @@ subroutine psb_s_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_elg_csput.F90 b/cuda/impl/psb_s_cuda_elg_csput.F90 index 036eabb2..d43294bf 100644 --- a/cuda/impl/psb_s_cuda_elg_csput.F90 +++ b/cuda/impl/psb_s_cuda_elg_csput.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csput_a -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a @@ -128,13 +123,9 @@ subroutine psb_s_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csput_v use psb_s_cuda_vect_mod -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_s_cuda_elg_from_gpu.F90 b/cuda/impl/psb_s_cuda_elg_from_gpu.F90 index bdc55790..d995157e 100644 --- a/cuda/impl/psb_s_cuda_elg_from_gpu.F90 +++ b/cuda/impl/psb_s_cuda_elg_from_gpu.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_s_cuda_elg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_from_gpu -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_s_cuda_elg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat))) then call a%free() return @@ -69,6 +63,5 @@ subroutine psb_s_cuda_elg_from_gpu(a,info) if (info == 0) info = & & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_s_cuda_elg_from_gpu diff --git a/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 index 79e546f5..537365a6 100644 --- a/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 @@ -27,19 +27,14 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! - +! subroutine psb_s_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_inner_vect_sv -#else - use psb_s_cuda_elg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_elg_sparse_mat), intent(in) :: a diff --git a/cuda/impl/psb_s_cuda_elg_mold.F90 b/cuda/impl/psb_s_cuda_elg_mold.F90 index dc8730bb..7ff9c7ae 100644 --- a/cuda/impl/psb_s_cuda_elg_mold.F90 +++ b/cuda/impl/psb_s_cuda_elg_mold.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_elg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 index 3f34fcec..5b99b9ad 100644 --- a/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_reallocate_nz -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a @@ -64,10 +59,8 @@ subroutine psb_s_cuda_elg_reallocate_nz(nz,a) goto 9999 end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nzrm) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_elg_scal.F90 b/cuda/impl/psb_s_cuda_elg_scal.F90 index cd6e1a5b..dfa99cf1 100644 --- a/cuda/impl/psb_s_cuda_elg_scal.F90 +++ b/cuda/impl/psb_s_cuda_elg_scal.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_scal -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) @@ -63,10 +58,8 @@ subroutine psb_s_cuda_elg_scal(d,a,info,side) call a%psb_s_ell_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_elg_scals.F90 b/cuda/impl/psb_s_cuda_elg_scals.F90 index 4ee8a64d..f0aa2504 100644 --- a/cuda/impl/psb_s_cuda_elg_scals.F90 +++ b/cuda/impl/psb_s_cuda_elg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_scals -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d @@ -59,10 +54,8 @@ subroutine psb_s_cuda_elg_scals(d,a,info) a%val(:,:) = a%val(:,:) * d -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_s_cuda_elg_to_gpu.F90 b/cuda/impl/psb_s_cuda_elg_to_gpu.F90 index 7d04d2b0..9c16ea8d 100644 --- a/cuda/impl/psb_s_cuda_elg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_elg_to_gpu.F90 @@ -29,30 +29,22 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_to_gpu -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -88,6 +80,5 @@ subroutine psb_s_cuda_elg_to_gpu(a,info,nzrm) if (info == 0) info = & & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_s_cuda_elg_to_gpu diff --git a/cuda/impl/psb_s_cuda_elg_trim.f90 b/cuda/impl/psb_s_cuda_elg_trim.f90 index 516aebc4..2d390343 100644 --- a/cuda/impl/psb_s_cuda_elg_trim.f90 +++ b/cuda/impl/psb_s_cuda_elg_trim.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_trim(a) use psb_base_mod diff --git a/cuda/impl/psb_s_cuda_elg_vect_mv.F90 b/cuda/impl/psb_s_cuda_elg_vect_mv.F90 index dad62418..6c898fda 100644 --- a/cuda/impl/psb_s_cuda_elg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_elg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_vect_mv -#else - use psb_s_cuda_elg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_elg_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() if (.not.x%is_host()) call x%sync() @@ -116,10 +110,6 @@ subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - if (a%is_dev()) call a%sync() - call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hdiag_csmv.F90 b/cuda/impl/psb_s_cuda_hdiag_csmv.F90 index 8e7e4931..3f34c2e7 100644 --- a/cuda/impl/psb_s_cuda_hdiag_csmv.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_csmv -#else - use psb_s_cuda_hdiag_mat_mod -#endif implicit none class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +123,4 @@ subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_s_cuda_hdiag_csmv diff --git a/cuda/impl/psb_s_cuda_hdiag_mold.F90 b/cuda/impl/psb_s_cuda_hdiag_mold.F90 index e662b07b..c11283dd 100644 --- a/cuda/impl/psb_s_cuda_hdiag_mold.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hdiag_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 index 5fe493aa..bc3fa325 100644 --- a/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 @@ -29,29 +29,21 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hdiag_to_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_to_gpu -#else - use psb_s_cuda_hdiag_mat_mod -#endif use iso_c_binding implicit none class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight -#ifdef HAVE_SPGPU type(hdiagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU nr = a%get_nrows() nc = a%get_ncols() hacksize = a%hackSize @@ -81,6 +73,4 @@ subroutine psb_s_cuda_hdiag_to_gpu(a,info) if (info == 0) info = & & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) -#endif - end subroutine psb_s_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 index 3496a637..03215047 100644 --- a/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_vect_mv -#else - use psb_s_cuda_hdiag_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= dzero) then @@ -112,9 +106,6 @@ subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_s_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 index 3f2765c4..480f6677 100644 --- a/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_allocate_mnnz -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a @@ -47,19 +42,15 @@ subroutine psb_s_cuda_hlg_allocate_mnnz(m,n,a,nz) Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(hlldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ call a%psb_s_hll_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hlg_csmm.F90 b/cuda/impl/psb_s_cuda_hlg_csmm.F90 index 2e274c22..0dc28c7f 100644 --- a/cuda/impl/psb_s_cuda_hlg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_hlg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_csmm -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -93,7 +88,6 @@ subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -118,9 +112,6 @@ subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -128,5 +119,4 @@ subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) return - end subroutine psb_s_cuda_hlg_csmm diff --git a/cuda/impl/psb_s_cuda_hlg_csmv.F90 b/cuda/impl/psb_s_cuda_hlg_csmv.F90 index 56ea8cdb..c029c908 100644 --- a/cuda/impl/psb_s_cuda_hlg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_hlg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_csmv -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_s_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_s_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hlg_from_gpu.F90 b/cuda/impl/psb_s_cuda_hlg_from_gpu.F90 index 14ab19b7..8d9a315b 100644 --- a/cuda/impl/psb_s_cuda_hlg_from_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hlg_from_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_from_gpu -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_s_cuda_hlg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (a%is_sync()) return if (a%is_host()) return if (.not.(c_associated(a%deviceMat))) then @@ -71,6 +65,5 @@ subroutine psb_s_cuda_hlg_from_gpu(a,info) if (info == 0) info = & & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_s_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 index a9f4f743..2985a1ab 100644 --- a/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_inner_vect_sv -#else - use psb_s_cuda_hlg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,11 +64,9 @@ subroutine psb_s_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) goto 9999 end if - call psb_erractionrestore(err_act) return - 9999 call psb_error_handler(err_act) return diff --git a/cuda/impl/psb_s_cuda_hlg_mold.F90 b/cuda/impl/psb_s_cuda_hlg_mold.F90 index 90e9cebf..89e329e7 100644 --- a/cuda/impl/psb_s_cuda_hlg_mold.F90 +++ b/cuda/impl/psb_s_cuda_hlg_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 index d5b9333c..03742958 100644 --- a/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_reallocate_nz -#else - use psb_s_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz @@ -52,10 +47,8 @@ subroutine psb_s_cuda_hlg_reallocate_nz(nz,a) call a%psb_s_hll_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hlg_scal.F90 b/cuda/impl/psb_s_cuda_hlg_scal.F90 index e803a63d..7074b8b6 100644 --- a/cuda/impl/psb_s_cuda_hlg_scal.F90 +++ b/cuda/impl/psb_s_cuda_hlg_scal.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_scal -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) @@ -60,10 +55,8 @@ subroutine psb_s_cuda_hlg_scal(d,a,info,side) call a%psb_s_hll_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hlg_scals.F90 b/cuda/impl/psb_s_cuda_hlg_scals.F90 index eec592e1..2c9f5ae8 100644 --- a/cuda/impl/psb_s_cuda_hlg_scals.F90 +++ b/cuda/impl/psb_s_cuda_hlg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_scals -#else - use psb_s_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a @@ -59,10 +54,8 @@ subroutine psb_s_cuda_hlg_scals(d,a,info) call a%psb_s_hll_sparse_mat%scal(d,info) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 index 14a2a629..91cfd5ad 100644 --- a/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_to_gpu -#else - use psb_s_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a @@ -50,7 +45,6 @@ subroutine psb_s_cuda_hlg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return n = a%get_nrows() @@ -63,6 +57,5 @@ subroutine psb_s_cuda_hlg_to_gpu(a,info,nzrm) if (info == 0) info = & & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) ! if (info /= 0) goto 9999 -#endif end subroutine psb_s_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 index 2b964f91..94696949 100644 --- a/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_vect_mv -#else - use psb_s_cuda_hlg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,9 +64,7 @@ subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) goto 9999 endif - tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= szero) then @@ -115,9 +108,6 @@ subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 index 0cf1e2bc..d4649d4c 100644 --- a/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_allocate_mnnz -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_s_cuda_hybg_allocate_mnnz(m,n,a,nz) call a%psb_s_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hybg_csmm.F90 b/cuda/impl/psb_s_cuda_hybg_csmm.F90 index f89df384..f321ce8b 100644 --- a/cuda/impl/psb_s_cuda_hybg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_hybg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_csmm -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +88,6 @@ subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hybg_csmv.F90 b/cuda/impl/psb_s_cuda_hybg_csmv.F90 index 01642146..5f1a8c90 100644 --- a/cuda/impl/psb_s_cuda_hybg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_hybg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_s_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_csmv -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -95,7 +91,6 @@ subroutine psb_s_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -123,9 +118,6 @@ subroutine psb_s_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 index f0006f5c..722505ff 100644 --- a/cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 @@ -33,13 +33,9 @@ subroutine psb_s_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_inner_vect_sv -#else - use psb_s_cuda_hybg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a @@ -76,7 +72,6 @@ subroutine psb_s_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=szero)) then call x%sync() call y%sync() @@ -113,12 +108,6 @@ subroutine psb_s_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='hybg_vect_sv') diff --git a/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 index 7ee15f52..7311ed64 100644 --- a/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_reallocate_nz -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_s_cuda_hybg_reallocate_nz(nz,a) ! call a%psb_s_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hybg_scal.F90 b/cuda/impl/psb_s_cuda_hybg_scal.F90 index 7a3978b7..d6f8eb8d 100644 --- a/cuda/impl/psb_s_cuda_hybg_scal.F90 +++ b/cuda/impl/psb_s_cuda_hybg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_hybg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_scal -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) @@ -60,10 +56,8 @@ subroutine psb_s_cuda_hybg_scal(d,a,info,side) call a%psb_s_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hybg_scals.F90 b/cuda/impl/psb_s_cuda_hybg_scals.F90 index a19ae3f6..f80d8efe 100644 --- a/cuda/impl/psb_s_cuda_hybg_scals.F90 +++ b/cuda/impl/psb_s_cuda_hybg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_hybg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_scals -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d @@ -60,10 +56,8 @@ subroutine psb_s_cuda_hybg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 index ec415176..168a0981 100644 --- a/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_to_gpu -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_s_cuda_hybg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -148,7 +143,6 @@ subroutine psb_s_cuda_hybg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in HYBG_TO_GPU ',info end if -#endif end subroutine psb_s_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 index a83c4561..6273f31a 100644 --- a/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_vect_mv -#else - use psb_s_cuda_hybg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a @@ -71,8 +67,6 @@ subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') - -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= szero) then @@ -112,9 +106,6 @@ subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 index a9e297bd..b61e94d6 100644 --- a/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_mv_csrg_from_coo -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a @@ -51,9 +46,7 @@ subroutine psb_s_cuda_mv_csrg_from_coo(a,b,info) call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif if (info /= 0) goto 9999 return diff --git a/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 index 54bc0ae4..52643a10 100644 --- a/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_mv_csrg_from_fmt -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,9 +50,7 @@ subroutine psb_s_cuda_mv_csrg_from_fmt(a,b,info) class default call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_s_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 index fda60d96..1b2fe8a1 100644 --- a/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_mv_diag_from_coo -#else - use psb_s_cuda_diag_mat_mod -#endif implicit none diff --git a/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 index 447e2971..e8dbbabf 100644 --- a/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_mv_elg_from_coo -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,5 +52,4 @@ subroutine psb_s_cuda_mv_elg_from_coo(a,b,info) return - end subroutine psb_s_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 index e88080dd..21d9a339 100644 --- a/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_mv_elg_from_fmt -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a @@ -49,9 +44,7 @@ subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) !locals type(psb_s_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ @@ -65,13 +58,9 @@ subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat call move_alloc(b%irn, a%irn) call move_alloc(b%idiag, a%idiag) @@ -87,9 +76,7 @@ subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) end if a%nzt = nza call b%free() -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default call b%mv_to_coo(tmp,info) diff --git a/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 index f3252eb2..b0370ebb 100644 --- a/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_mv_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_s_cuda_hdiag_mat_mod -#endif implicit none @@ -54,16 +48,12 @@ subroutine psb_s_cuda_mv_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_s_hdia_sparse_mat%mv_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 index 9810a85e..4c8aab71 100644 --- a/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_mv_hlg_from_coo -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 index 700dc151..a162e2aa 100644 --- a/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_mv_hlg_from_fmt -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 index ca9f34c1..ddae4546 100644 --- a/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_mv_hybg_from_coo -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_s_cuda_mv_hybg_from_coo(a,b,info) call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 index 5ba606af..c6250104 100644 --- a/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_mv_hybg_from_fmt -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a @@ -54,9 +50,7 @@ subroutine psb_s_cuda_mv_hybg_from_fmt(a,b,info) class default call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_s_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 index 186190ac..90f3fb4f 100644 --- a/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 @@ -32,12 +32,8 @@ subroutine psb_z_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_cp_csrg_from_coo -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a @@ -48,10 +44,8 @@ subroutine psb_z_cuda_cp_csrg_from_coo(a,b,info) call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 index d1e1a82d..26490a15 100644 --- a/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 @@ -32,12 +32,8 @@ subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_cp_csrg_from_fmt -#else - use psb_z_cuda_csrg_mat_mod -#endif !use iso_c_binding implicit none @@ -53,9 +49,7 @@ subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) class default call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_z_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 index c303b127..34706502 100644 --- a/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 @@ -33,13 +33,9 @@ subroutine psb_z_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_cp_diag_from_coo -#else - use psb_z_cuda_diag_mat_mod -#endif implicit none class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_z_cuda_cp_diag_from_coo(a,b,info) info = psb_success_ call a%psb_z_dia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 index 4b18b89b..e8553cbf 100644 --- a/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 @@ -28,20 +28,14 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_cp_elg_from_coo use psi_ext_util_mod use psb_cuda_env_mod -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,16 +51,11 @@ subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) integer(psb_ipk_), allocatable :: idisp(:) info = psb_success_ -#ifdef HAVE_SPGPU hacksize = max(1,psb_cuda_WarpSize()) -#else - hacksize = 1 -#endif if (b%is_dev()) call b%sync() if (b%is_by_rows()) then -#ifdef HAVE_SPGPU call psi_z_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) @@ -82,15 +71,8 @@ subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & & a%irn,idisp,b%ja,b%val, a%deviceMat) call a%set_dev() -#else - - call psi_z_convert_ell_from_coo(a,b,info,hacksize=hacksize) - call a%set_host() -#endif - else call b%cp_to_coo(tmp,info) -#ifdef HAVE_SPGPU call psi_z_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) @@ -107,11 +89,6 @@ subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) call a%set_dev() -#else - - call psi_z_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) - call a%set_host() -#endif end if if (info /= psb_success_) goto 9999 diff --git a/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 index 6fa91de6..85066cef 100644 --- a/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_cp_elg_from_fmt -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a @@ -51,9 +47,7 @@ subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ if (b%is_dev()) call b%sync() @@ -67,13 +61,9 @@ subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) @@ -88,9 +78,7 @@ subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) a%val(1:m,1:nzm) = b%val(1:m,1:nzm) end if a%nzt = nza -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default diff --git a/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 index c94d8824..36013faa 100644 --- a/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_cp_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_z_cuda_hdiag_mat_mod -#endif implicit none class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a @@ -53,16 +47,12 @@ subroutine psb_z_cuda_cp_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_z_hdia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 index 1607f1b6..f7be0835 100644 --- a/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 @@ -33,14 +33,10 @@ subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_cp_hlg_from_coo -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a @@ -61,11 +57,7 @@ subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info) info = psb_success_ debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() -#ifdef HAVE_SPGPU hksz = max(1,psb_cuda_WarpSize()) -#else - hksz = psi_get_hksz() -#endif if (b%is_by_rows()) then diff --git a/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 index e8c1f95d..253a034f 100644 --- a/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_z_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_cp_hlg_from_fmt -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_z_cuda_cp_hlg_from_fmt(a,b,info) call a%cp_from_coo(b,info) class default call a%psb_z_hll_sparse_mat%cp_from_fmt(b,info) -#ifdef HAVE_SPGPU if (info == 0) call a%to_gpu(info) -#endif end select if (info /= 0) goto 9999 diff --git a/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 index 6031526a..a6491d68 100644 --- a/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_cp_hybg_from_coo -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a @@ -49,10 +45,8 @@ subroutine psb_z_cuda_cp_hybg_from_coo(a,b,info) call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 index 0202ef24..748fadca 100644 --- a/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_cp_hybg_from_fmt -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_z_cuda_cp_hybg_from_fmt(a,b,info) class default call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_z_cuda_cp_hybg_from_fmt diff --git a/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 index a7533e59..a7988dd3 100644 --- a/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_allocate_mnnz -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_z_cuda_csrg_allocate_mnnz(m,n,a,nz) call a%psb_z_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() if (info == 0) call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_csrg_csmm.F90 b/cuda/impl/psb_z_cuda_csrg_csmm.F90 index 49fb9fcf..731b7417 100644 --- a/cuda/impl/psb_z_cuda_csrg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_csrg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_csmm -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -94,7 +90,6 @@ subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -119,9 +114,6 @@ subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_csrg_csmv.F90 b/cuda/impl/psb_z_cuda_csrg_csmv.F90 index 54ad6f4f..f6b01e7e 100644 --- a/cuda/impl/psb_z_cuda_csrg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_csrg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_z_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_csmv -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -96,7 +92,6 @@ subroutine psb_z_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -124,9 +119,6 @@ subroutine psb_z_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_csrg_from_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_from_gpu.F90 index bb3b49d5..78db4435 100644 --- a/cuda/impl/psb_z_cuda_csrg_from_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_from_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_z_cuda_csrg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_from_gpu -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +44,6 @@ subroutine psb_z_cuda_csrg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat%mat))) then call a%free() return @@ -68,6 +63,5 @@ subroutine psb_z_cuda_csrg_from_gpu(a,info) #endif call a%set_sync() -#endif end subroutine psb_z_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 index 6c7b1fcb..9a3f8281 100644 --- a/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 @@ -32,13 +32,9 @@ subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_inner_vect_sv -#else - use psb_z_cuda_csrg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a @@ -75,7 +71,6 @@ subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=dzero)) then call x%sync() call y%sync() @@ -112,12 +107,6 @@ subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='csrg_vect_sv') diff --git a/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 index 61ae0f59..964cd84e 100644 --- a/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_reallocate_nz -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_z_cuda_csrg_reallocate_nz(nz,a) ! call a%psb_z_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_csrg_scal.F90 b/cuda/impl/psb_z_cuda_csrg_scal.F90 index a2099933..9d97433e 100644 --- a/cuda/impl/psb_z_cuda_csrg_scal.F90 +++ b/cuda/impl/psb_z_cuda_csrg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_csrg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_scal -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) @@ -58,10 +54,8 @@ subroutine psb_z_cuda_csrg_scal(d,a,info,side) call a%psb_z_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_csrg_scals.F90 b/cuda/impl/psb_z_cuda_csrg_scals.F90 index 72fee99b..1479ea3a 100644 --- a/cuda/impl/psb_z_cuda_csrg_scals.F90 +++ b/cuda/impl/psb_z_cuda_csrg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_csrg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_scals -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d @@ -56,10 +52,8 @@ subroutine psb_z_cuda_csrg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 index c6f217ab..f7e65627 100644 --- a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_to_gpu -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -320,6 +315,5 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in CSRG_TO_GPU ',info end if -#endif end subroutine psb_z_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 index 964134fb..977d7ff9 100644 --- a/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_vect_mv -#else - use psb_z_cuda_csrg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a @@ -72,7 +68,6 @@ subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= zzero) then @@ -112,9 +107,6 @@ subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_diag_csmv.F90 b/cuda/impl/psb_z_cuda_diag_csmv.F90 index 2e86f0f8..fde7147e 100644 --- a/cuda/impl/psb_z_cuda_diag_csmv.F90 +++ b/cuda/impl/psb_z_cuda_diag_csmv.F90 @@ -33,13 +33,9 @@ subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_csmv -#else - use psb_z_cuda_diag_mat_mod -#endif implicit none class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +90,6 @@ subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +116,6 @@ subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +124,4 @@ subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_z_cuda_diag_csmv diff --git a/cuda/impl/psb_z_cuda_diag_to_gpu.F90 b/cuda/impl/psb_z_cuda_diag_to_gpu.F90 index a28858b5..672ce938 100644 --- a/cuda/impl/psb_z_cuda_diag_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_diag_to_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_z_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_to_gpu -#else - use psb_z_cuda_diag_mat_mod -#endif use iso_c_binding implicit none class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a @@ -47,13 +43,10 @@ subroutine psb_z_cuda_diag_to_gpu(a,info,nzrm) integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d -#ifdef HAVE_SPGPU type(diagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return n = size(a%data,1) @@ -69,6 +62,5 @@ subroutine psb_z_cuda_diag_to_gpu(a,info,nzrm) if (info == 0) info = & & writeDiagDevice(a%deviceMat,a%data,a%offset,n) ! if (info /= 0) goto 9999 -#endif end subroutine psb_z_cuda_diag_to_gpu diff --git a/cuda/impl/psb_z_cuda_diag_vect_mv.F90 b/cuda/impl/psb_z_cuda_diag_vect_mv.F90 index 12f3c3e7..c6d11f04 100644 --- a/cuda/impl/psb_z_cuda_diag_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_diag_vect_mv.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_vect_mv -#else - use psb_z_cuda_diag_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_diag_sparse_mat), intent(in) :: a @@ -71,7 +65,6 @@ subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= szero) then @@ -112,9 +105,6 @@ subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_z_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 index c2a641b6..c79a7e8c 100644 --- a/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 @@ -32,13 +32,9 @@ subroutine psb_z_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod use psb_z_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_vect_mv -#else - use psb_z_cuda_dnsg_mat_mod -#endif implicit none class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta @@ -123,13 +119,9 @@ end subroutine psb_z_cuda_dnsg_vect_mv subroutine psb_z_cuda_dnsg_mold(a,b,info) use psb_base_mod use psb_z_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_mold -#else - use psb_z_cuda_dnsg_mat_mod -#endif implicit none class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b @@ -190,17 +182,12 @@ end subroutine psb_z_cuda_dnsg_mold !!$ end subroutine psb_z_cuda_dnsg_allocate_mnnz !!$ end interface - subroutine psb_z_cuda_dnsg_to_gpu(a,info) use psb_base_mod use psb_z_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_to_gpu -#else - use psb_z_cuda_dnsg_mat_mod -#endif class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda @@ -209,15 +196,12 @@ subroutine psb_z_cuda_dnsg_to_gpu(a,info) call psb_erractionsave(err_act) info = psb_success_ -#ifdef HAVE_SPGPU if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& & spgpu_type_complex_double,1) if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info - -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return @@ -233,13 +217,9 @@ end subroutine psb_z_cuda_dnsg_to_gpu subroutine psb_z_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod use psb_z_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_cp_dnsg_from_coo -#else - use psb_z_cuda_dnsg_mat_mod -#endif implicit none class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -272,13 +252,9 @@ end subroutine psb_z_cuda_cp_dnsg_from_coo subroutine psb_z_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_z_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_cp_dnsg_from_fmt -#else - use psb_z_cuda_dnsg_mat_mod -#endif implicit none class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -348,13 +324,9 @@ end subroutine psb_z_cuda_cp_dnsg_from_fmt subroutine psb_z_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod use psb_z_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_mv_dnsg_from_coo -#else - use psb_z_cuda_dnsg_mat_mod -#endif implicit none class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -383,18 +355,13 @@ subroutine psb_z_cuda_mv_dnsg_from_coo(a,b,info) return end subroutine psb_z_cuda_mv_dnsg_from_coo - - + subroutine psb_z_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_z_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_mv_dnsg_from_fmt -#else - use psb_z_cuda_dnsg_mat_mod -#endif implicit none class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b diff --git a/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 index a2e36feb..5c54d00b 100644 --- a/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_allocate_mnnz -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a @@ -47,9 +41,7 @@ subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ @@ -74,13 +66,9 @@ subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) goto 9999 endif -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_complex_double,1) ld = gpu_parms%pitch nz_ = gpu_parms%maxRowSize -#else - ld = m -#endif if (info == psb_success_) call psb_realloc(m,a%irn,info) if (info == psb_success_) call psb_realloc(m,a%idiag,info) @@ -98,10 +86,8 @@ subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) call a%set_dupl(psb_dupl_def_) end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_elg_asb.f90 b/cuda/impl/psb_z_cuda_elg_asb.f90 index 511183f5..65b58425 100644 --- a/cuda/impl/psb_z_cuda_elg_asb.f90 +++ b/cuda/impl/psb_z_cuda_elg_asb.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_asb(a) use psb_base_mod diff --git a/cuda/impl/psb_z_cuda_elg_csmm.F90 b/cuda/impl/psb_z_cuda_elg_csmm.F90 index d4034b65..4414f0e6 100644 --- a/cuda/impl/psb_z_cuda_elg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_elg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csmm -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +87,6 @@ subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -119,9 +112,6 @@ subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_elg_csmv.F90 b/cuda/impl/psb_z_cuda_elg_csmv.F90 index eba12d16..060147b9 100644 --- a/cuda/impl/psb_z_cuda_elg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_elg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csmv -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_z_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -122,9 +116,6 @@ subroutine psb_z_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_elg_csput.F90 b/cuda/impl/psb_z_cuda_elg_csput.F90 index 5a52f4f8..a9eb7c43 100644 --- a/cuda/impl/psb_z_cuda_elg_csput.F90 +++ b/cuda/impl/psb_z_cuda_elg_csput.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csput_a -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a @@ -128,13 +123,9 @@ subroutine psb_z_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csput_v use psb_z_cuda_vect_mod -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_z_cuda_elg_from_gpu.F90 b/cuda/impl/psb_z_cuda_elg_from_gpu.F90 index ffed4349..b1291ab2 100644 --- a/cuda/impl/psb_z_cuda_elg_from_gpu.F90 +++ b/cuda/impl/psb_z_cuda_elg_from_gpu.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_z_cuda_elg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_from_gpu -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_z_cuda_elg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat))) then call a%free() return @@ -69,6 +63,5 @@ subroutine psb_z_cuda_elg_from_gpu(a,info) if (info == 0) info = & & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_z_cuda_elg_from_gpu diff --git a/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 index 7564d5dd..443e7cbb 100644 --- a/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 @@ -27,19 +27,14 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! - +! subroutine psb_z_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_inner_vect_sv -#else - use psb_z_cuda_elg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_elg_sparse_mat), intent(in) :: a diff --git a/cuda/impl/psb_z_cuda_elg_mold.F90 b/cuda/impl/psb_z_cuda_elg_mold.F90 index e027c9f2..e9a3891d 100644 --- a/cuda/impl/psb_z_cuda_elg_mold.F90 +++ b/cuda/impl/psb_z_cuda_elg_mold.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_elg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 index 16cebe70..3a8c2760 100644 --- a/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_reallocate_nz -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a @@ -64,10 +59,8 @@ subroutine psb_z_cuda_elg_reallocate_nz(nz,a) goto 9999 end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nzrm) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_elg_scal.F90 b/cuda/impl/psb_z_cuda_elg_scal.F90 index 4802aaaa..a9846362 100644 --- a/cuda/impl/psb_z_cuda_elg_scal.F90 +++ b/cuda/impl/psb_z_cuda_elg_scal.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_scal -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) @@ -63,10 +58,8 @@ subroutine psb_z_cuda_elg_scal(d,a,info,side) call a%psb_z_ell_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_elg_scals.F90 b/cuda/impl/psb_z_cuda_elg_scals.F90 index 5db823da..b4462589 100644 --- a/cuda/impl/psb_z_cuda_elg_scals.F90 +++ b/cuda/impl/psb_z_cuda_elg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_scals -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d @@ -59,10 +54,8 @@ subroutine psb_z_cuda_elg_scals(d,a,info) a%val(:,:) = a%val(:,:) * d -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_z_cuda_elg_to_gpu.F90 b/cuda/impl/psb_z_cuda_elg_to_gpu.F90 index 6d86bdd9..3a0ecd14 100644 --- a/cuda/impl/psb_z_cuda_elg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_elg_to_gpu.F90 @@ -29,30 +29,22 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_to_gpu -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -88,6 +80,5 @@ subroutine psb_z_cuda_elg_to_gpu(a,info,nzrm) if (info == 0) info = & & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_z_cuda_elg_to_gpu diff --git a/cuda/impl/psb_z_cuda_elg_trim.f90 b/cuda/impl/psb_z_cuda_elg_trim.f90 index 3d261150..98f92efe 100644 --- a/cuda/impl/psb_z_cuda_elg_trim.f90 +++ b/cuda/impl/psb_z_cuda_elg_trim.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_trim(a) use psb_base_mod diff --git a/cuda/impl/psb_z_cuda_elg_vect_mv.F90 b/cuda/impl/psb_z_cuda_elg_vect_mv.F90 index 4bd1b3ed..1b1a0720 100644 --- a/cuda/impl/psb_z_cuda_elg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_elg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_vect_mv -#else - use psb_z_cuda_elg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_elg_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() if (.not.x%is_host()) call x%sync() @@ -116,10 +110,6 @@ subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - if (a%is_dev()) call a%sync() - call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hdiag_csmv.F90 b/cuda/impl/psb_z_cuda_hdiag_csmv.F90 index 8be14704..187655a8 100644 --- a/cuda/impl/psb_z_cuda_hdiag_csmv.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_csmv -#else - use psb_z_cuda_hdiag_mat_mod -#endif implicit none class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +123,4 @@ subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_z_cuda_hdiag_csmv diff --git a/cuda/impl/psb_z_cuda_hdiag_mold.F90 b/cuda/impl/psb_z_cuda_hdiag_mold.F90 index 33fdd8eb..7b86b3a3 100644 --- a/cuda/impl/psb_z_cuda_hdiag_mold.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hdiag_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 index 47126aca..a1140961 100644 --- a/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 @@ -29,29 +29,21 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hdiag_to_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_to_gpu -#else - use psb_z_cuda_hdiag_mat_mod -#endif use iso_c_binding implicit none class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight -#ifdef HAVE_SPGPU type(hdiagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU nr = a%get_nrows() nc = a%get_ncols() hacksize = a%hackSize @@ -81,6 +73,4 @@ subroutine psb_z_cuda_hdiag_to_gpu(a,info) if (info == 0) info = & & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) -#endif - end subroutine psb_z_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 index cf0b3457..aef5628c 100644 --- a/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_vect_mv -#else - use psb_z_cuda_hdiag_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= dzero) then @@ -112,9 +106,6 @@ subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_z_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 index 228244f1..f8566661 100644 --- a/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_allocate_mnnz -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a @@ -47,19 +42,15 @@ subroutine psb_z_cuda_hlg_allocate_mnnz(m,n,a,nz) Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(hlldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ call a%psb_z_hll_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hlg_csmm.F90 b/cuda/impl/psb_z_cuda_hlg_csmm.F90 index 325ab0d0..8eb30ef9 100644 --- a/cuda/impl/psb_z_cuda_hlg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_hlg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_csmm -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -93,7 +88,6 @@ subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -118,9 +112,6 @@ subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -128,5 +119,4 @@ subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) return - end subroutine psb_z_cuda_hlg_csmm diff --git a/cuda/impl/psb_z_cuda_hlg_csmv.F90 b/cuda/impl/psb_z_cuda_hlg_csmv.F90 index ac84190e..1a807016 100644 --- a/cuda/impl/psb_z_cuda_hlg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_hlg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_csmv -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_z_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_z_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hlg_from_gpu.F90 b/cuda/impl/psb_z_cuda_hlg_from_gpu.F90 index 4db6c3ce..8260ecd7 100644 --- a/cuda/impl/psb_z_cuda_hlg_from_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hlg_from_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_from_gpu -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_z_cuda_hlg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (a%is_sync()) return if (a%is_host()) return if (.not.(c_associated(a%deviceMat))) then @@ -71,6 +65,5 @@ subroutine psb_z_cuda_hlg_from_gpu(a,info) if (info == 0) info = & & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_z_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 index f99a5a9e..6a914fc0 100644 --- a/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_inner_vect_sv -#else - use psb_z_cuda_hlg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,11 +64,9 @@ subroutine psb_z_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) goto 9999 end if - call psb_erractionrestore(err_act) return - 9999 call psb_error_handler(err_act) return diff --git a/cuda/impl/psb_z_cuda_hlg_mold.F90 b/cuda/impl/psb_z_cuda_hlg_mold.F90 index cc9ad510..bc631ece 100644 --- a/cuda/impl/psb_z_cuda_hlg_mold.F90 +++ b/cuda/impl/psb_z_cuda_hlg_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 index aaba9be5..ac3fbbd1 100644 --- a/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_reallocate_nz -#else - use psb_z_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz @@ -52,10 +47,8 @@ subroutine psb_z_cuda_hlg_reallocate_nz(nz,a) call a%psb_z_hll_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hlg_scal.F90 b/cuda/impl/psb_z_cuda_hlg_scal.F90 index 3ffda36a..7b9df998 100644 --- a/cuda/impl/psb_z_cuda_hlg_scal.F90 +++ b/cuda/impl/psb_z_cuda_hlg_scal.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_scal -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) @@ -60,10 +55,8 @@ subroutine psb_z_cuda_hlg_scal(d,a,info,side) call a%psb_z_hll_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hlg_scals.F90 b/cuda/impl/psb_z_cuda_hlg_scals.F90 index bae50c7c..b867e3b8 100644 --- a/cuda/impl/psb_z_cuda_hlg_scals.F90 +++ b/cuda/impl/psb_z_cuda_hlg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_scals -#else - use psb_z_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a @@ -59,10 +54,8 @@ subroutine psb_z_cuda_hlg_scals(d,a,info) call a%psb_z_hll_sparse_mat%scal(d,info) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 index 93c9f043..8f81842a 100644 --- a/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_to_gpu -#else - use psb_z_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a @@ -50,7 +45,6 @@ subroutine psb_z_cuda_hlg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return n = a%get_nrows() @@ -63,6 +57,5 @@ subroutine psb_z_cuda_hlg_to_gpu(a,info,nzrm) if (info == 0) info = & & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) ! if (info /= 0) goto 9999 -#endif end subroutine psb_z_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 index f377efec..e2e93b85 100644 --- a/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_vect_mv -#else - use psb_z_cuda_hlg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,9 +64,7 @@ subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) goto 9999 endif - tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= zzero) then @@ -115,9 +108,6 @@ subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 index 0c6f9aa9..39e10134 100644 --- a/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_allocate_mnnz -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_z_cuda_hybg_allocate_mnnz(m,n,a,nz) call a%psb_z_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hybg_csmm.F90 b/cuda/impl/psb_z_cuda_hybg_csmm.F90 index d4a32420..bcd1874a 100644 --- a/cuda/impl/psb_z_cuda_hybg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_hybg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_csmm -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +88,6 @@ subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hybg_csmv.F90 b/cuda/impl/psb_z_cuda_hybg_csmv.F90 index 180a8ae1..d7c5cab6 100644 --- a/cuda/impl/psb_z_cuda_hybg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_hybg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_z_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_csmv -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -95,7 +91,6 @@ subroutine psb_z_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -123,9 +118,6 @@ subroutine psb_z_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 index 1df47788..36fa3890 100644 --- a/cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 @@ -33,13 +33,9 @@ subroutine psb_z_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_inner_vect_sv -#else - use psb_z_cuda_hybg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a @@ -76,7 +72,6 @@ subroutine psb_z_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=zzero)) then call x%sync() call y%sync() @@ -113,12 +108,6 @@ subroutine psb_z_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='hybg_vect_sv') diff --git a/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 index 5278ba35..f295b332 100644 --- a/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_reallocate_nz -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_z_cuda_hybg_reallocate_nz(nz,a) ! call a%psb_z_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hybg_scal.F90 b/cuda/impl/psb_z_cuda_hybg_scal.F90 index cd436e76..40b9d6b0 100644 --- a/cuda/impl/psb_z_cuda_hybg_scal.F90 +++ b/cuda/impl/psb_z_cuda_hybg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_hybg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_scal -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) @@ -60,10 +56,8 @@ subroutine psb_z_cuda_hybg_scal(d,a,info,side) call a%psb_z_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hybg_scals.F90 b/cuda/impl/psb_z_cuda_hybg_scals.F90 index 0a9ee79d..bd7b9224 100644 --- a/cuda/impl/psb_z_cuda_hybg_scals.F90 +++ b/cuda/impl/psb_z_cuda_hybg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_hybg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_scals -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d @@ -60,10 +56,8 @@ subroutine psb_z_cuda_hybg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 index 107b5049..f3a32c81 100644 --- a/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_to_gpu -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_z_cuda_hybg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -148,7 +143,6 @@ subroutine psb_z_cuda_hybg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in HYBG_TO_GPU ',info end if -#endif end subroutine psb_z_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 index 22751f2d..0cb1db22 100644 --- a/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_vect_mv -#else - use psb_z_cuda_hybg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a @@ -71,8 +67,6 @@ subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') - -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= zzero) then @@ -112,9 +106,6 @@ subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 index d5390ee3..7cfd4f19 100644 --- a/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_mv_csrg_from_coo -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a @@ -51,9 +46,7 @@ subroutine psb_z_cuda_mv_csrg_from_coo(a,b,info) call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif if (info /= 0) goto 9999 return diff --git a/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 index e2bfdb73..f03294c8 100644 --- a/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_mv_csrg_from_fmt -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,9 +50,7 @@ subroutine psb_z_cuda_mv_csrg_from_fmt(a,b,info) class default call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_z_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 index b61813bf..8e702e7e 100644 --- a/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_mv_diag_from_coo -#else - use psb_z_cuda_diag_mat_mod -#endif implicit none diff --git a/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 index e3ff4036..f0cb23f3 100644 --- a/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_mv_elg_from_coo -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,5 +52,4 @@ subroutine psb_z_cuda_mv_elg_from_coo(a,b,info) return - end subroutine psb_z_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 index 07a80173..29f63423 100644 --- a/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_mv_elg_from_fmt -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a @@ -49,9 +44,7 @@ subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) !locals type(psb_z_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ @@ -65,13 +58,9 @@ subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat call move_alloc(b%irn, a%irn) call move_alloc(b%idiag, a%idiag) @@ -87,9 +76,7 @@ subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) end if a%nzt = nza call b%free() -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default call b%mv_to_coo(tmp,info) diff --git a/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 index f25e6370..dd6dae1e 100644 --- a/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_mv_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_z_cuda_hdiag_mat_mod -#endif implicit none @@ -54,16 +48,12 @@ subroutine psb_z_cuda_mv_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_z_hdia_sparse_mat%mv_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 index 3bc630de..609680b9 100644 --- a/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_mv_hlg_from_coo -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 index d746a341..e67c8d83 100644 --- a/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_mv_hlg_from_fmt -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 index 7d0d9eec..69fda3d6 100644 --- a/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_mv_hybg_from_coo -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_z_cuda_mv_hybg_from_coo(a,b,info) call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 index 7bfc27e3..55c33a09 100644 --- a/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_mv_hybg_from_fmt -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a @@ -54,9 +50,7 @@ subroutine psb_z_cuda_mv_hybg_from_fmt(a,b,info) class default call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_z_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/ivectordev.c b/cuda/ivectordev.c index 71d5c472..241f1115 100644 --- a/cuda/ivectordev.c +++ b/cuda/ivectordev.c @@ -32,7 +32,6 @@ #include #include -#if defined(HAVE_SPGPU) //#include "utils.h" //#include "common.h" #include "ivectordev.h" @@ -177,6 +176,3 @@ int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void return SPGPU_SUCCESS; } - -#endif - diff --git a/cuda/ivectordev.h b/cuda/ivectordev.h index 5f7ca974..2db54be4 100644 --- a/cuda/ivectordev.h +++ b/cuda/ivectordev.h @@ -31,7 +31,6 @@ #pragma once -#if defined(HAVE_SPGPU) //#include "utils.h" #include "vectordev.h" #include "cuda_runtime.h" @@ -60,5 +59,3 @@ int iscatMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, int first int indexBase, int beta); int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void *indexes, int hfirst, void* host_values, int indexBase, int beta); - -#endif diff --git a/cuda/psb_base_vectordev_mod.F90 b/cuda/psb_base_vectordev_mod.F90 index f8c303d0..da02b2f3 100644 --- a/cuda/psb_base_vectordev_mod.F90 +++ b/cuda/psb_base_vectordev_mod.F90 @@ -40,9 +40,6 @@ module psb_base_vectordev_mod integer(c_int) :: pitch integer(c_int) :: size end type multivec_dev_parms - -#ifdef HAVE_SPGPU - interface function FallocMultiVecDevice(deviceVec,count,Size,elementType) & @@ -54,7 +51,6 @@ module psb_base_vectordev_mod end function FallocMultiVecDevice end interface - interface subroutine unregisterMapped(buf) & & bind(c,name='unregisterMapped') @@ -98,7 +94,4 @@ module psb_base_vectordev_mod end function getMultiVecDevicePitch end interface -#endif - - end module psb_base_vectordev_mod diff --git a/cuda/psb_c_cuda_csrg_mat_mod.F90 b/cuda/psb_c_cuda_csrg_mat_mod.F90 index a98d7e99..1fdeec4a 100644 --- a/cuda/psb_c_cuda_csrg_mat_mod.F90 +++ b/cuda/psb_c_cuda_csrg_mat_mod.F90 @@ -48,7 +48,6 @@ module psb_c_cuda_csrg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(c_Cmat) :: deviceMat integer(psb_ipk_) :: devstate = is_host @@ -81,13 +80,8 @@ module psb_c_cuda_csrg_mat_mod procedure, pass(a) :: to_gpu => psb_c_cuda_csrg_to_gpu procedure, pass(a) :: from_gpu => psb_c_cuda_csrg_from_gpu final :: c_cuda_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_cuda_csrg_mold -#endif end type psb_c_cuda_csrg_sparse_mat -#ifdef HAVE_SPGPU private :: c_cuda_csrg_get_nzeros, c_cuda_csrg_free, c_cuda_csrg_get_fmt, & & c_cuda_csrg_get_size, c_cuda_csrg_sizeof, c_cuda_csrg_get_nz_row @@ -378,16 +372,4 @@ contains end subroutine c_cuda_csrg_finalize -#else - interface - subroutine psb_c_cuda_csrg_mold(a,b,info) - import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cuda_csrg_mold - end interface - -#endif - end module psb_c_cuda_csrg_mat_mod diff --git a/cuda/psb_c_cuda_diag_mat_mod.F90 b/cuda/psb_c_cuda_diag_mat_mod.F90 index 1d5db05b..93ffe498 100644 --- a/cuda/psb_c_cuda_diag_mat_mod.F90 +++ b/cuda/psb_c_cuda_diag_mat_mod.F90 @@ -44,7 +44,6 @@ module psb_c_cuda_diag_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -68,13 +67,8 @@ module psb_c_cuda_diag_mat_mod procedure, pass(a) :: mold => psb_c_cuda_diag_mold procedure, pass(a) :: to_gpu => psb_c_cuda_diag_to_gpu final :: c_cuda_diag_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_cuda_diag_mold -#endif end type psb_c_cuda_diag_sparse_mat -#ifdef HAVE_SPGPU private :: c_cuda_diag_get_nzeros, c_cuda_diag_free, c_cuda_diag_get_fmt, & & c_cuda_diag_get_size, c_cuda_diag_sizeof, c_cuda_diag_get_nz_row @@ -292,17 +286,4 @@ contains return end subroutine c_cuda_diag_finalize -#else - - interface - subroutine psb_c_cuda_diag_mold(a,b,info) - import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_cuda_diag_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cuda_diag_mold - end interface - -#endif - end module psb_c_cuda_diag_mat_mod diff --git a/cuda/psb_c_cuda_dnsg_mat_mod.F90 b/cuda/psb_c_cuda_dnsg_mat_mod.F90 index e89e117b..b0ca8c46 100644 --- a/cuda/psb_c_cuda_dnsg_mat_mod.F90 +++ b/cuda/psb_c_cuda_dnsg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_c_cuda_dnsg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of DNS, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -69,13 +68,8 @@ module psb_c_cuda_dnsg_mat_mod procedure, pass(a) :: mold => psb_c_cuda_dnsg_mold procedure, pass(a) :: to_gpu => psb_c_cuda_dnsg_to_gpu final :: c_cuda_dnsg_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_cuda_dnsg_mold -#endif end type psb_c_cuda_dnsg_sparse_mat -#ifdef HAVE_SPGPU private :: c_cuda_dnsg_get_nzeros, c_cuda_dnsg_free, c_cuda_dnsg_get_fmt, & & c_cuda_dnsg_get_size, c_cuda_dnsg_get_nz_row @@ -278,17 +272,4 @@ contains return end subroutine c_cuda_dnsg_finalize -#else - - interface - subroutine psb_c_cuda_dnsg_mold(a,b,info) - import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cuda_dnsg_mold - end interface - -#endif - end module psb_c_cuda_dnsg_mat_mod diff --git a/cuda/psb_c_cuda_elg_mat_mod.F90 b/cuda/psb_c_cuda_elg_mat_mod.F90 index 43250ce3..c9b48005 100644 --- a/cuda/psb_c_cuda_elg_mat_mod.F90 +++ b/cuda/psb_c_cuda_elg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_c_cuda_elg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of ELL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer(psb_ipk_) :: devstate = is_host @@ -86,14 +85,8 @@ module psb_c_cuda_elg_mat_mod procedure, pass(a) :: to_gpu => psb_c_cuda_elg_to_gpu procedure, pass(a) :: asb => psb_c_cuda_elg_asb final :: c_cuda_elg_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_cuda_elg_mold - procedure, pass(a) :: asb => psb_c_cuda_elg_asb -#endif end type psb_c_cuda_elg_sparse_mat -#ifdef HAVE_SPGPU private :: c_cuda_elg_get_nzeros, c_cuda_elg_free, c_cuda_elg_get_fmt, & & c_cuda_elg_get_size, c_cuda_elg_sizeof, c_cuda_elg_get_nz_row, c_cuda_elg_sync @@ -460,24 +453,4 @@ contains end subroutine c_cuda_elg_finalize -#else - - interface - subroutine psb_c_cuda_elg_asb(a) - import :: psb_c_cuda_elg_sparse_mat - class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a - end subroutine psb_c_cuda_elg_asb - end interface - - interface - subroutine psb_c_cuda_elg_mold(a,b,info) - import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_cuda_elg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cuda_elg_mold - end interface - -#endif - end module psb_c_cuda_elg_mat_mod diff --git a/cuda/psb_c_cuda_hdiag_mat_mod.F90 b/cuda/psb_c_cuda_hdiag_mat_mod.F90 index 54f47684..f06e501e 100644 --- a/cuda/psb_c_cuda_hdiag_mat_mod.F90 +++ b/cuda/psb_c_cuda_hdiag_mat_mod.F90 @@ -38,7 +38,6 @@ module psb_c_cuda_hdiag_mat_mod type, extends(psb_c_hdia_sparse_mat) :: psb_c_cuda_hdiag_sparse_mat ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -62,13 +61,8 @@ module psb_c_cuda_hdiag_mat_mod procedure, pass(a) :: mold => psb_c_cuda_hdiag_mold procedure, pass(a) :: to_gpu => psb_c_cuda_hdiag_to_gpu final :: c_cuda_hdiag_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_cuda_hdiag_mold -#endif end type psb_c_cuda_hdiag_sparse_mat -#ifdef HAVE_SPGPU private :: c_cuda_hdiag_get_nzeros, c_cuda_hdiag_free, c_cuda_hdiag_get_fmt, & & c_cuda_hdiag_get_size, c_cuda_hdiag_sizeof, c_cuda_hdiag_get_nz_row @@ -271,17 +265,4 @@ contains return end subroutine c_cuda_hdiag_finalize -#else - - interface - subroutine psb_c_cuda_hdiag_mold(a,b,info) - import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cuda_hdiag_mold - end interface - -#endif - end module psb_c_cuda_hdiag_mat_mod diff --git a/cuda/psb_c_cuda_hlg_mat_mod.F90 b/cuda/psb_c_cuda_hlg_mat_mod.F90 index 74284f30..e98f2474 100644 --- a/cuda/psb_c_cuda_hlg_mat_mod.F90 +++ b/cuda/psb_c_cuda_hlg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_c_cuda_hlg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer :: devstate = is_host @@ -82,13 +81,8 @@ module psb_c_cuda_hlg_mat_mod procedure, pass(a) :: from_gpu => psb_c_cuda_hlg_from_gpu procedure, pass(a) :: to_gpu => psb_c_cuda_hlg_to_gpu final :: c_cuda_hlg_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_cuda_hlg_mold -#endif end type psb_c_cuda_hlg_sparse_mat -#ifdef HAVE_SPGPU private :: c_cuda_hlg_get_nzeros, c_cuda_hlg_free, c_cuda_hlg_get_fmt, & & c_cuda_hlg_get_size, c_cuda_hlg_sizeof, c_cuda_hlg_get_nz_row @@ -382,17 +376,4 @@ contains return end subroutine c_cuda_hlg_finalize -#else - - interface - subroutine psb_c_cuda_hlg_mold(a,b,info) - import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cuda_hlg_mold - end interface - -#endif - end module psb_c_cuda_hlg_mat_mod diff --git a/cuda/psb_c_cuda_hybg_mat_mod.F90 b/cuda/psb_c_cuda_hybg_mat_mod.F90 index d16988ba..1c94bc0f 100644 --- a/cuda/psb_c_cuda_hybg_mat_mod.F90 +++ b/cuda/psb_c_cuda_hybg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_c_cuda_hybg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(c_Hmat) :: deviceMat contains @@ -69,13 +68,8 @@ module psb_c_cuda_hybg_mat_mod procedure, pass(a) :: mold => psb_c_cuda_hybg_mold procedure, pass(a) :: to_gpu => psb_c_cuda_hybg_to_gpu final :: c_cuda_hybg_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_cuda_hybg_mold -#endif end type psb_c_cuda_hybg_sparse_mat -#ifdef HAVE_SPGPU private :: c_cuda_hybg_get_nzeros, c_cuda_hybg_free, c_cuda_hybg_get_fmt, & & c_cuda_hybg_get_size, c_cuda_hybg_sizeof, c_cuda_hybg_get_nz_row @@ -289,18 +283,5 @@ contains return end subroutine c_cuda_hybg_finalize -#else - - interface - subroutine psb_c_cuda_hybg_mold(a,b,info) - import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cuda_hybg_mold - end interface - -#endif - end module psb_c_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index be06167e..c140dadb 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -35,20 +35,17 @@ module psb_c_cuda_vect_mod use psb_const_mod use psb_error_mod use psb_c_vect_mod - use psb_i_vect_mod -#ifdef HAVE_SPGPU use psb_cuda_env_mod + use psb_i_vect_mod use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_c_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_c_base_vect_type) :: psb_c_vect_cuda -#ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr complex(c_float_complex), allocatable :: pinned_buffer(:) @@ -105,7 +102,6 @@ module psb_c_cuda_vect_mod procedure, pass(x) :: absval2 => c_cuda_absval2 final :: c_cuda_vect_finalize -#endif end type psb_c_vect_cuda public :: psb_c_vect_cuda_ @@ -126,8 +122,6 @@ contains end function constructor -#ifdef HAVE_SPGPU - subroutine c_cuda_device_wait() call psb_cudaSync() end subroutine c_cuda_device_wait @@ -1204,8 +1198,6 @@ contains end subroutine c_cuda_ins_a -#endif - end module psb_c_cuda_vect_mod @@ -1221,20 +1213,16 @@ module psb_c_cuda_multivect_mod use psb_error_mod use psb_c_multivect_mod use psb_c_base_multivect_mod - + use psb_cuda_env_mod use psb_i_multivect_mod -#ifdef HAVE_SPGPU use psb_i_cuda_multivect_mod use psb_c_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_c_base_multivect_type) :: psb_c_multivect_cuda -#ifdef HAVE_SPGPU - integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 type(c_ptr) :: deviceVect = c_null_ptr real(c_double), allocatable :: buffer(:,:) @@ -1276,7 +1264,6 @@ module psb_c_cuda_multivect_mod !!$ procedure, pass(y) :: sctb => c_cuda_multi_sctb !!$ procedure, pass(y) :: sctb_x => c_cuda_multi_sctb_x final :: c_cuda_multi_vect_finalize -#endif end type psb_c_multivect_cuda public :: psb_c_multivect_cuda @@ -1297,7 +1284,6 @@ contains end function constructor -#ifdef HAVE_SPGPU !!$ subroutine c_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod @@ -1981,8 +1967,6 @@ contains end subroutine c_cuda_multi_ins -#endif - end module psb_c_cuda_multivect_mod diff --git a/cuda/psb_c_vectordev_mod.F90 b/cuda/psb_c_vectordev_mod.F90 index f3c243a6..b15b2371 100644 --- a/cuda/psb_c_vectordev_mod.F90 +++ b/cuda/psb_c_vectordev_mod.F90 @@ -34,8 +34,6 @@ module psb_c_vectordev_mod use psb_base_vectordev_mod -#ifdef HAVE_SPGPU - interface registerMapped function registerMappedFloatComplex(buf,d_p,n,dummy) & & result(res) bind(c,name='registerMappedFloatComplex') @@ -385,6 +383,4 @@ contains call unregisterMapped(c_loc(buffer)) end subroutine inner_unregisterFloatComplex -#endif - end module psb_c_vectordev_mod diff --git a/cuda/psb_cuda_env_mod.F90 b/cuda/psb_cuda_env_mod.F90 index 0d1d4ced..4778d229 100644 --- a/cuda/psb_cuda_env_mod.F90 +++ b/cuda/psb_cuda_env_mod.F90 @@ -37,7 +37,6 @@ module psb_cuda_env_mod ! interface psb_cuda_init ! module procedure psb_cuda_init ! end interface -#if defined(HAVE_CUDA) use core_mod interface @@ -126,7 +125,6 @@ module psb_cuda_env_mod use iso_c_binding end subroutine psb_cuda_innerClose end interface -#endif interface function psb_C_DeviceHasUVA() & @@ -209,7 +207,6 @@ Contains info = psb_success_ call psb_erractionsave(err_act) -#if defined (HAVE_CUDA) #if defined(SERIAL_MPI) iam = 0 #else @@ -234,7 +231,6 @@ Contains goto 9999 end if call psb_cudaCreateHandle() -#endif call psb_erractionrestore(err_act) return 9999 call psb_error_handler(ctxt,err_act) @@ -245,18 +241,12 @@ Contains subroutine psb_cuda_DeviceSync() -#if defined(HAVE_CUDA) call psb_cudaSync() -#endif end subroutine psb_cuda_DeviceSync function psb_cuda_getDeviceCount() result(res) integer :: res -#if defined(HAVE_CUDA) res = psb_cuda_inner_getDeviceCount() -#else - res = 0 -#endif end function psb_cuda_getDeviceCount subroutine psb_cuda_exit() diff --git a/cuda/psb_d_cuda_csrg_mat_mod.F90 b/cuda/psb_d_cuda_csrg_mat_mod.F90 index 465c16a7..101959bd 100644 --- a/cuda/psb_d_cuda_csrg_mat_mod.F90 +++ b/cuda/psb_d_cuda_csrg_mat_mod.F90 @@ -48,7 +48,6 @@ module psb_d_cuda_csrg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(d_Cmat) :: deviceMat integer(psb_ipk_) :: devstate = is_host @@ -81,13 +80,8 @@ module psb_d_cuda_csrg_mat_mod procedure, pass(a) :: to_gpu => psb_d_cuda_csrg_to_gpu procedure, pass(a) :: from_gpu => psb_d_cuda_csrg_from_gpu final :: d_cuda_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_cuda_csrg_mold -#endif end type psb_d_cuda_csrg_sparse_mat -#ifdef HAVE_SPGPU private :: d_cuda_csrg_get_nzeros, d_cuda_csrg_free, d_cuda_csrg_get_fmt, & & d_cuda_csrg_get_size, d_cuda_csrg_sizeof, d_cuda_csrg_get_nz_row @@ -378,16 +372,4 @@ contains end subroutine d_cuda_csrg_finalize -#else - interface - subroutine psb_d_cuda_csrg_mold(a,b,info) - import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cuda_csrg_mold - end interface - -#endif - end module psb_d_cuda_csrg_mat_mod diff --git a/cuda/psb_d_cuda_diag_mat_mod.F90 b/cuda/psb_d_cuda_diag_mat_mod.F90 index 1d55faa0..11e4e349 100644 --- a/cuda/psb_d_cuda_diag_mat_mod.F90 +++ b/cuda/psb_d_cuda_diag_mat_mod.F90 @@ -44,7 +44,6 @@ module psb_d_cuda_diag_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -68,13 +67,8 @@ module psb_d_cuda_diag_mat_mod procedure, pass(a) :: mold => psb_d_cuda_diag_mold procedure, pass(a) :: to_gpu => psb_d_cuda_diag_to_gpu final :: d_cuda_diag_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_cuda_diag_mold -#endif end type psb_d_cuda_diag_sparse_mat -#ifdef HAVE_SPGPU private :: d_cuda_diag_get_nzeros, d_cuda_diag_free, d_cuda_diag_get_fmt, & & d_cuda_diag_get_size, d_cuda_diag_sizeof, d_cuda_diag_get_nz_row @@ -292,17 +286,4 @@ contains return end subroutine d_cuda_diag_finalize -#else - - interface - subroutine psb_d_cuda_diag_mold(a,b,info) - import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_cuda_diag_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cuda_diag_mold - end interface - -#endif - end module psb_d_cuda_diag_mat_mod diff --git a/cuda/psb_d_cuda_dnsg_mat_mod.F90 b/cuda/psb_d_cuda_dnsg_mat_mod.F90 index bb24eb1a..f11e7823 100644 --- a/cuda/psb_d_cuda_dnsg_mat_mod.F90 +++ b/cuda/psb_d_cuda_dnsg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_d_cuda_dnsg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of DNS, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -69,13 +68,8 @@ module psb_d_cuda_dnsg_mat_mod procedure, pass(a) :: mold => psb_d_cuda_dnsg_mold procedure, pass(a) :: to_gpu => psb_d_cuda_dnsg_to_gpu final :: d_cuda_dnsg_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_cuda_dnsg_mold -#endif end type psb_d_cuda_dnsg_sparse_mat -#ifdef HAVE_SPGPU private :: d_cuda_dnsg_get_nzeros, d_cuda_dnsg_free, d_cuda_dnsg_get_fmt, & & d_cuda_dnsg_get_size, d_cuda_dnsg_get_nz_row @@ -278,17 +272,4 @@ contains return end subroutine d_cuda_dnsg_finalize -#else - - interface - subroutine psb_d_cuda_dnsg_mold(a,b,info) - import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cuda_dnsg_mold - end interface - -#endif - end module psb_d_cuda_dnsg_mat_mod diff --git a/cuda/psb_d_cuda_elg_mat_mod.F90 b/cuda/psb_d_cuda_elg_mat_mod.F90 index 1ac47664..1af80f2a 100644 --- a/cuda/psb_d_cuda_elg_mat_mod.F90 +++ b/cuda/psb_d_cuda_elg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_d_cuda_elg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of ELL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer(psb_ipk_) :: devstate = is_host @@ -86,14 +85,8 @@ module psb_d_cuda_elg_mat_mod procedure, pass(a) :: to_gpu => psb_d_cuda_elg_to_gpu procedure, pass(a) :: asb => psb_d_cuda_elg_asb final :: d_cuda_elg_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_cuda_elg_mold - procedure, pass(a) :: asb => psb_d_cuda_elg_asb -#endif end type psb_d_cuda_elg_sparse_mat -#ifdef HAVE_SPGPU private :: d_cuda_elg_get_nzeros, d_cuda_elg_free, d_cuda_elg_get_fmt, & & d_cuda_elg_get_size, d_cuda_elg_sizeof, d_cuda_elg_get_nz_row, d_cuda_elg_sync @@ -460,24 +453,4 @@ contains end subroutine d_cuda_elg_finalize -#else - - interface - subroutine psb_d_cuda_elg_asb(a) - import :: psb_d_cuda_elg_sparse_mat - class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a - end subroutine psb_d_cuda_elg_asb - end interface - - interface - subroutine psb_d_cuda_elg_mold(a,b,info) - import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_cuda_elg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cuda_elg_mold - end interface - -#endif - end module psb_d_cuda_elg_mat_mod diff --git a/cuda/psb_d_cuda_hdiag_mat_mod.F90 b/cuda/psb_d_cuda_hdiag_mat_mod.F90 index 17bacffe..46b63b43 100644 --- a/cuda/psb_d_cuda_hdiag_mat_mod.F90 +++ b/cuda/psb_d_cuda_hdiag_mat_mod.F90 @@ -38,7 +38,6 @@ module psb_d_cuda_hdiag_mat_mod type, extends(psb_d_hdia_sparse_mat) :: psb_d_cuda_hdiag_sparse_mat ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -62,13 +61,8 @@ module psb_d_cuda_hdiag_mat_mod procedure, pass(a) :: mold => psb_d_cuda_hdiag_mold procedure, pass(a) :: to_gpu => psb_d_cuda_hdiag_to_gpu final :: d_cuda_hdiag_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_cuda_hdiag_mold -#endif end type psb_d_cuda_hdiag_sparse_mat -#ifdef HAVE_SPGPU private :: d_cuda_hdiag_get_nzeros, d_cuda_hdiag_free, d_cuda_hdiag_get_fmt, & & d_cuda_hdiag_get_size, d_cuda_hdiag_sizeof, d_cuda_hdiag_get_nz_row @@ -271,17 +265,4 @@ contains return end subroutine d_cuda_hdiag_finalize -#else - - interface - subroutine psb_d_cuda_hdiag_mold(a,b,info) - import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cuda_hdiag_mold - end interface - -#endif - end module psb_d_cuda_hdiag_mat_mod diff --git a/cuda/psb_d_cuda_hlg_mat_mod.F90 b/cuda/psb_d_cuda_hlg_mat_mod.F90 index 19ecb62b..6627f824 100644 --- a/cuda/psb_d_cuda_hlg_mat_mod.F90 +++ b/cuda/psb_d_cuda_hlg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_d_cuda_hlg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer :: devstate = is_host @@ -82,13 +81,8 @@ module psb_d_cuda_hlg_mat_mod procedure, pass(a) :: from_gpu => psb_d_cuda_hlg_from_gpu procedure, pass(a) :: to_gpu => psb_d_cuda_hlg_to_gpu final :: d_cuda_hlg_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_cuda_hlg_mold -#endif end type psb_d_cuda_hlg_sparse_mat -#ifdef HAVE_SPGPU private :: d_cuda_hlg_get_nzeros, d_cuda_hlg_free, d_cuda_hlg_get_fmt, & & d_cuda_hlg_get_size, d_cuda_hlg_sizeof, d_cuda_hlg_get_nz_row @@ -382,17 +376,4 @@ contains return end subroutine d_cuda_hlg_finalize -#else - - interface - subroutine psb_d_cuda_hlg_mold(a,b,info) - import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cuda_hlg_mold - end interface - -#endif - end module psb_d_cuda_hlg_mat_mod diff --git a/cuda/psb_d_cuda_hybg_mat_mod.F90 b/cuda/psb_d_cuda_hybg_mat_mod.F90 index be4c8392..065c0db3 100644 --- a/cuda/psb_d_cuda_hybg_mat_mod.F90 +++ b/cuda/psb_d_cuda_hybg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_d_cuda_hybg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(d_Hmat) :: deviceMat contains @@ -69,13 +68,8 @@ module psb_d_cuda_hybg_mat_mod procedure, pass(a) :: mold => psb_d_cuda_hybg_mold procedure, pass(a) :: to_gpu => psb_d_cuda_hybg_to_gpu final :: d_cuda_hybg_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_cuda_hybg_mold -#endif end type psb_d_cuda_hybg_sparse_mat -#ifdef HAVE_SPGPU private :: d_cuda_hybg_get_nzeros, d_cuda_hybg_free, d_cuda_hybg_get_fmt, & & d_cuda_hybg_get_size, d_cuda_hybg_sizeof, d_cuda_hybg_get_nz_row @@ -289,18 +283,5 @@ contains return end subroutine d_cuda_hybg_finalize -#else - - interface - subroutine psb_d_cuda_hybg_mold(a,b,info) - import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cuda_hybg_mold - end interface - -#endif - end module psb_d_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 83ec108b..44381c99 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -35,20 +35,17 @@ module psb_d_cuda_vect_mod use psb_const_mod use psb_error_mod use psb_d_vect_mod - use psb_i_vect_mod -#ifdef HAVE_SPGPU use psb_cuda_env_mod + use psb_i_vect_mod use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_d_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_d_base_vect_type) :: psb_d_vect_cuda -#ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr real(c_double), allocatable :: pinned_buffer(:) @@ -105,7 +102,6 @@ module psb_d_cuda_vect_mod procedure, pass(x) :: absval2 => d_cuda_absval2 final :: d_cuda_vect_finalize -#endif end type psb_d_vect_cuda public :: psb_d_vect_cuda_ @@ -126,8 +122,6 @@ contains end function constructor -#ifdef HAVE_SPGPU - subroutine d_cuda_device_wait() call psb_cudaSync() end subroutine d_cuda_device_wait @@ -1204,8 +1198,6 @@ contains end subroutine d_cuda_ins_a -#endif - end module psb_d_cuda_vect_mod @@ -1221,20 +1213,16 @@ module psb_d_cuda_multivect_mod use psb_error_mod use psb_d_multivect_mod use psb_d_base_multivect_mod - + use psb_cuda_env_mod use psb_i_multivect_mod -#ifdef HAVE_SPGPU use psb_i_cuda_multivect_mod use psb_d_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_d_base_multivect_type) :: psb_d_multivect_cuda -#ifdef HAVE_SPGPU - integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 type(c_ptr) :: deviceVect = c_null_ptr real(c_double), allocatable :: buffer(:,:) @@ -1276,7 +1264,6 @@ module psb_d_cuda_multivect_mod !!$ procedure, pass(y) :: sctb => d_cuda_multi_sctb !!$ procedure, pass(y) :: sctb_x => d_cuda_multi_sctb_x final :: d_cuda_multi_vect_finalize -#endif end type psb_d_multivect_cuda public :: psb_d_multivect_cuda @@ -1297,7 +1284,6 @@ contains end function constructor -#ifdef HAVE_SPGPU !!$ subroutine d_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod @@ -1981,8 +1967,6 @@ contains end subroutine d_cuda_multi_ins -#endif - end module psb_d_cuda_multivect_mod diff --git a/cuda/psb_d_vectordev_mod.F90 b/cuda/psb_d_vectordev_mod.F90 index cda0d9d7..802add96 100644 --- a/cuda/psb_d_vectordev_mod.F90 +++ b/cuda/psb_d_vectordev_mod.F90 @@ -34,8 +34,6 @@ module psb_d_vectordev_mod use psb_base_vectordev_mod -#ifdef HAVE_SPGPU - interface registerMapped function registerMappedDouble(buf,d_p,n,dummy) & & result(res) bind(c,name='registerMappedDouble') @@ -385,6 +383,4 @@ contains call unregisterMapped(c_loc(buffer)) end subroutine inner_unregisterDouble -#endif - end module psb_d_vectordev_mod diff --git a/cuda/psb_i_cuda_csrg_mat_mod.F90 b/cuda/psb_i_cuda_csrg_mat_mod.F90 index de0eac09..0867a8a5 100644 --- a/cuda/psb_i_cuda_csrg_mat_mod.F90 +++ b/cuda/psb_i_cuda_csrg_mat_mod.F90 @@ -48,7 +48,6 @@ module psb_i_cuda_csrg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(i_Cmat) :: deviceMat integer(psb_ipk_) :: devstate = is_host @@ -81,13 +80,8 @@ module psb_i_cuda_csrg_mat_mod procedure, pass(a) :: to_gpu => psb_i_cuda_csrg_to_gpu procedure, pass(a) :: from_gpu => psb_i_cuda_csrg_from_gpu final :: i_cuda_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_cuda_csrg_mold -#endif end type psb_i_cuda_csrg_sparse_mat -#ifdef HAVE_SPGPU private :: i_cuda_csrg_get_nzeros, i_cuda_csrg_free, i_cuda_csrg_get_fmt, & & i_cuda_csrg_get_size, i_cuda_csrg_sizeof, i_cuda_csrg_get_nz_row @@ -378,16 +372,4 @@ contains end subroutine i_cuda_csrg_finalize -#else - interface - subroutine psb_i_cuda_csrg_mold(a,b,info) - import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cuda_csrg_mold - end interface - -#endif - end module psb_i_cuda_csrg_mat_mod diff --git a/cuda/psb_i_cuda_diag_mat_mod.F90 b/cuda/psb_i_cuda_diag_mat_mod.F90 index 94a3cc3e..98090f06 100644 --- a/cuda/psb_i_cuda_diag_mat_mod.F90 +++ b/cuda/psb_i_cuda_diag_mat_mod.F90 @@ -44,7 +44,6 @@ module psb_i_cuda_diag_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -68,13 +67,8 @@ module psb_i_cuda_diag_mat_mod procedure, pass(a) :: mold => psb_i_cuda_diag_mold procedure, pass(a) :: to_gpu => psb_i_cuda_diag_to_gpu final :: i_cuda_diag_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_cuda_diag_mold -#endif end type psb_i_cuda_diag_sparse_mat -#ifdef HAVE_SPGPU private :: i_cuda_diag_get_nzeros, i_cuda_diag_free, i_cuda_diag_get_fmt, & & i_cuda_diag_get_size, i_cuda_diag_sizeof, i_cuda_diag_get_nz_row @@ -292,17 +286,4 @@ contains return end subroutine i_cuda_diag_finalize -#else - - interface - subroutine psb_i_cuda_diag_mold(a,b,info) - import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_cuda_diag_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cuda_diag_mold - end interface - -#endif - end module psb_i_cuda_diag_mat_mod diff --git a/cuda/psb_i_cuda_dnsg_mat_mod.F90 b/cuda/psb_i_cuda_dnsg_mat_mod.F90 index f357977e..6586d115 100644 --- a/cuda/psb_i_cuda_dnsg_mat_mod.F90 +++ b/cuda/psb_i_cuda_dnsg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_i_cuda_dnsg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of DNS, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -69,13 +68,8 @@ module psb_i_cuda_dnsg_mat_mod procedure, pass(a) :: mold => psb_i_cuda_dnsg_mold procedure, pass(a) :: to_gpu => psb_i_cuda_dnsg_to_gpu final :: i_cuda_dnsg_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_cuda_dnsg_mold -#endif end type psb_i_cuda_dnsg_sparse_mat -#ifdef HAVE_SPGPU private :: i_cuda_dnsg_get_nzeros, i_cuda_dnsg_free, i_cuda_dnsg_get_fmt, & & i_cuda_dnsg_get_size, i_cuda_dnsg_get_nz_row @@ -278,17 +272,4 @@ contains return end subroutine i_cuda_dnsg_finalize -#else - - interface - subroutine psb_i_cuda_dnsg_mold(a,b,info) - import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cuda_dnsg_mold - end interface - -#endif - end module psb_i_cuda_dnsg_mat_mod diff --git a/cuda/psb_i_cuda_elg_mat_mod.F90 b/cuda/psb_i_cuda_elg_mat_mod.F90 index aa3e2d4d..48311eea 100644 --- a/cuda/psb_i_cuda_elg_mat_mod.F90 +++ b/cuda/psb_i_cuda_elg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_i_cuda_elg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of ELL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer(psb_ipk_) :: devstate = is_host @@ -86,14 +85,8 @@ module psb_i_cuda_elg_mat_mod procedure, pass(a) :: to_gpu => psb_i_cuda_elg_to_gpu procedure, pass(a) :: asb => psb_i_cuda_elg_asb final :: i_cuda_elg_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_cuda_elg_mold - procedure, pass(a) :: asb => psb_i_cuda_elg_asb -#endif end type psb_i_cuda_elg_sparse_mat -#ifdef HAVE_SPGPU private :: i_cuda_elg_get_nzeros, i_cuda_elg_free, i_cuda_elg_get_fmt, & & i_cuda_elg_get_size, i_cuda_elg_sizeof, i_cuda_elg_get_nz_row, i_cuda_elg_sync @@ -460,24 +453,4 @@ contains end subroutine i_cuda_elg_finalize -#else - - interface - subroutine psb_i_cuda_elg_asb(a) - import :: psb_i_cuda_elg_sparse_mat - class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a - end subroutine psb_i_cuda_elg_asb - end interface - - interface - subroutine psb_i_cuda_elg_mold(a,b,info) - import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_cuda_elg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cuda_elg_mold - end interface - -#endif - end module psb_i_cuda_elg_mat_mod diff --git a/cuda/psb_i_cuda_hdiag_mat_mod.F90 b/cuda/psb_i_cuda_hdiag_mat_mod.F90 index 03ff573b..22277f3a 100644 --- a/cuda/psb_i_cuda_hdiag_mat_mod.F90 +++ b/cuda/psb_i_cuda_hdiag_mat_mod.F90 @@ -38,7 +38,6 @@ module psb_i_cuda_hdiag_mat_mod type, extends(psb_i_hdia_sparse_mat) :: psb_i_cuda_hdiag_sparse_mat ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -62,13 +61,8 @@ module psb_i_cuda_hdiag_mat_mod procedure, pass(a) :: mold => psb_i_cuda_hdiag_mold procedure, pass(a) :: to_gpu => psb_i_cuda_hdiag_to_gpu final :: i_cuda_hdiag_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_cuda_hdiag_mold -#endif end type psb_i_cuda_hdiag_sparse_mat -#ifdef HAVE_SPGPU private :: i_cuda_hdiag_get_nzeros, i_cuda_hdiag_free, i_cuda_hdiag_get_fmt, & & i_cuda_hdiag_get_size, i_cuda_hdiag_sizeof, i_cuda_hdiag_get_nz_row @@ -271,17 +265,4 @@ contains return end subroutine i_cuda_hdiag_finalize -#else - - interface - subroutine psb_i_cuda_hdiag_mold(a,b,info) - import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cuda_hdiag_mold - end interface - -#endif - end module psb_i_cuda_hdiag_mat_mod diff --git a/cuda/psb_i_cuda_hlg_mat_mod.F90 b/cuda/psb_i_cuda_hlg_mat_mod.F90 index f97470d2..b4e3046e 100644 --- a/cuda/psb_i_cuda_hlg_mat_mod.F90 +++ b/cuda/psb_i_cuda_hlg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_i_cuda_hlg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer :: devstate = is_host @@ -82,13 +81,8 @@ module psb_i_cuda_hlg_mat_mod procedure, pass(a) :: from_gpu => psb_i_cuda_hlg_from_gpu procedure, pass(a) :: to_gpu => psb_i_cuda_hlg_to_gpu final :: i_cuda_hlg_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_cuda_hlg_mold -#endif end type psb_i_cuda_hlg_sparse_mat -#ifdef HAVE_SPGPU private :: i_cuda_hlg_get_nzeros, i_cuda_hlg_free, i_cuda_hlg_get_fmt, & & i_cuda_hlg_get_size, i_cuda_hlg_sizeof, i_cuda_hlg_get_nz_row @@ -382,17 +376,4 @@ contains return end subroutine i_cuda_hlg_finalize -#else - - interface - subroutine psb_i_cuda_hlg_mold(a,b,info) - import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cuda_hlg_mold - end interface - -#endif - end module psb_i_cuda_hlg_mat_mod diff --git a/cuda/psb_i_cuda_hybg_mat_mod.F90 b/cuda/psb_i_cuda_hybg_mat_mod.F90 index 10333c24..f6fc3110 100644 --- a/cuda/psb_i_cuda_hybg_mat_mod.F90 +++ b/cuda/psb_i_cuda_hybg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_i_cuda_hybg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(i_Hmat) :: deviceMat contains @@ -69,13 +68,8 @@ module psb_i_cuda_hybg_mat_mod procedure, pass(a) :: mold => psb_i_cuda_hybg_mold procedure, pass(a) :: to_gpu => psb_i_cuda_hybg_to_gpu final :: i_cuda_hybg_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_cuda_hybg_mold -#endif end type psb_i_cuda_hybg_sparse_mat -#ifdef HAVE_SPGPU private :: i_cuda_hybg_get_nzeros, i_cuda_hybg_free, i_cuda_hybg_get_fmt, & & i_cuda_hybg_get_size, i_cuda_hybg_sizeof, i_cuda_hybg_get_nz_row @@ -289,18 +283,5 @@ contains return end subroutine i_cuda_hybg_finalize -#else - - interface - subroutine psb_i_cuda_hybg_mold(a,b,info) - import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cuda_hybg_mold - end interface - -#endif - end module psb_i_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_i_cuda_vect_mod.F90 b/cuda/psb_i_cuda_vect_mod.F90 index 8d940513..df8f4113 100644 --- a/cuda/psb_i_cuda_vect_mod.F90 +++ b/cuda/psb_i_cuda_vect_mod.F90 @@ -35,17 +35,14 @@ module psb_i_cuda_vect_mod use psb_const_mod use psb_error_mod use psb_i_vect_mod -#ifdef HAVE_SPGPU use psb_cuda_env_mod use psb_i_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_i_base_vect_type) :: psb_i_vect_cuda -#ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr integer(c_int), allocatable :: pinned_buffer(:) @@ -88,7 +85,6 @@ module psb_i_cuda_vect_mod procedure, pass(x) :: maybe_free_buffer => i_cuda_maybe_free_buffer final :: i_cuda_vect_finalize -#endif end type psb_i_vect_cuda public :: psb_i_vect_cuda_ @@ -109,8 +105,6 @@ contains end function constructor -#ifdef HAVE_SPGPU - subroutine i_cuda_device_wait() call psb_cudaSync() end subroutine i_cuda_device_wait @@ -888,8 +882,6 @@ contains end subroutine i_cuda_ins_a -#endif - end module psb_i_cuda_vect_mod @@ -905,18 +897,14 @@ module psb_i_cuda_multivect_mod use psb_error_mod use psb_i_multivect_mod use psb_i_base_multivect_mod - -#ifdef HAVE_SPGPU + use psb_cuda_env_mod use psb_i_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_i_base_multivect_type) :: psb_i_multivect_cuda -#ifdef HAVE_SPGPU - integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 type(c_ptr) :: deviceVect = c_null_ptr real(c_double), allocatable :: buffer(:,:) @@ -958,7 +946,6 @@ module psb_i_cuda_multivect_mod !!$ procedure, pass(y) :: sctb => i_cuda_multi_sctb !!$ procedure, pass(y) :: sctb_x => i_cuda_multi_sctb_x final :: i_cuda_multi_vect_finalize -#endif end type psb_i_multivect_cuda public :: psb_i_multivect_cuda @@ -979,7 +966,6 @@ contains end function constructor -#ifdef HAVE_SPGPU !!$ subroutine i_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod @@ -1663,8 +1649,6 @@ contains end subroutine i_cuda_multi_ins -#endif - end module psb_i_cuda_multivect_mod diff --git a/cuda/psb_i_vectordev_mod.F90 b/cuda/psb_i_vectordev_mod.F90 index 9998d355..84037aaf 100644 --- a/cuda/psb_i_vectordev_mod.F90 +++ b/cuda/psb_i_vectordev_mod.F90 @@ -34,8 +34,6 @@ module psb_i_vectordev_mod use psb_base_vectordev_mod -#ifdef HAVE_SPGPU - interface registerMapped function registerMappedInt(buf,d_p,n,dummy) & & result(res) bind(c,name='registerMappedInt') @@ -278,6 +276,4 @@ contains call unregisterMapped(c_loc(buffer)) end subroutine inner_unregisterInt -#endif - end module psb_i_vectordev_mod diff --git a/cuda/psb_s_cuda_csrg_mat_mod.F90 b/cuda/psb_s_cuda_csrg_mat_mod.F90 index fb13d034..bd654dbb 100644 --- a/cuda/psb_s_cuda_csrg_mat_mod.F90 +++ b/cuda/psb_s_cuda_csrg_mat_mod.F90 @@ -48,7 +48,6 @@ module psb_s_cuda_csrg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(s_Cmat) :: deviceMat integer(psb_ipk_) :: devstate = is_host @@ -81,13 +80,8 @@ module psb_s_cuda_csrg_mat_mod procedure, pass(a) :: to_gpu => psb_s_cuda_csrg_to_gpu procedure, pass(a) :: from_gpu => psb_s_cuda_csrg_from_gpu final :: s_cuda_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_cuda_csrg_mold -#endif end type psb_s_cuda_csrg_sparse_mat -#ifdef HAVE_SPGPU private :: s_cuda_csrg_get_nzeros, s_cuda_csrg_free, s_cuda_csrg_get_fmt, & & s_cuda_csrg_get_size, s_cuda_csrg_sizeof, s_cuda_csrg_get_nz_row @@ -378,16 +372,4 @@ contains end subroutine s_cuda_csrg_finalize -#else - interface - subroutine psb_s_cuda_csrg_mold(a,b,info) - import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cuda_csrg_mold - end interface - -#endif - end module psb_s_cuda_csrg_mat_mod diff --git a/cuda/psb_s_cuda_diag_mat_mod.F90 b/cuda/psb_s_cuda_diag_mat_mod.F90 index 709cd728..b13a8502 100644 --- a/cuda/psb_s_cuda_diag_mat_mod.F90 +++ b/cuda/psb_s_cuda_diag_mat_mod.F90 @@ -44,7 +44,6 @@ module psb_s_cuda_diag_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -68,13 +67,8 @@ module psb_s_cuda_diag_mat_mod procedure, pass(a) :: mold => psb_s_cuda_diag_mold procedure, pass(a) :: to_gpu => psb_s_cuda_diag_to_gpu final :: s_cuda_diag_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_cuda_diag_mold -#endif end type psb_s_cuda_diag_sparse_mat -#ifdef HAVE_SPGPU private :: s_cuda_diag_get_nzeros, s_cuda_diag_free, s_cuda_diag_get_fmt, & & s_cuda_diag_get_size, s_cuda_diag_sizeof, s_cuda_diag_get_nz_row @@ -292,17 +286,4 @@ contains return end subroutine s_cuda_diag_finalize -#else - - interface - subroutine psb_s_cuda_diag_mold(a,b,info) - import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_cuda_diag_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cuda_diag_mold - end interface - -#endif - end module psb_s_cuda_diag_mat_mod diff --git a/cuda/psb_s_cuda_dnsg_mat_mod.F90 b/cuda/psb_s_cuda_dnsg_mat_mod.F90 index b01c8365..8193655e 100644 --- a/cuda/psb_s_cuda_dnsg_mat_mod.F90 +++ b/cuda/psb_s_cuda_dnsg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_s_cuda_dnsg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of DNS, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -69,13 +68,8 @@ module psb_s_cuda_dnsg_mat_mod procedure, pass(a) :: mold => psb_s_cuda_dnsg_mold procedure, pass(a) :: to_gpu => psb_s_cuda_dnsg_to_gpu final :: s_cuda_dnsg_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_cuda_dnsg_mold -#endif end type psb_s_cuda_dnsg_sparse_mat -#ifdef HAVE_SPGPU private :: s_cuda_dnsg_get_nzeros, s_cuda_dnsg_free, s_cuda_dnsg_get_fmt, & & s_cuda_dnsg_get_size, s_cuda_dnsg_get_nz_row @@ -278,17 +272,4 @@ contains return end subroutine s_cuda_dnsg_finalize -#else - - interface - subroutine psb_s_cuda_dnsg_mold(a,b,info) - import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cuda_dnsg_mold - end interface - -#endif - end module psb_s_cuda_dnsg_mat_mod diff --git a/cuda/psb_s_cuda_elg_mat_mod.F90 b/cuda/psb_s_cuda_elg_mat_mod.F90 index d6b003b5..1aa9d36d 100644 --- a/cuda/psb_s_cuda_elg_mat_mod.F90 +++ b/cuda/psb_s_cuda_elg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_s_cuda_elg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of ELL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer(psb_ipk_) :: devstate = is_host @@ -86,14 +85,8 @@ module psb_s_cuda_elg_mat_mod procedure, pass(a) :: to_gpu => psb_s_cuda_elg_to_gpu procedure, pass(a) :: asb => psb_s_cuda_elg_asb final :: s_cuda_elg_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_cuda_elg_mold - procedure, pass(a) :: asb => psb_s_cuda_elg_asb -#endif end type psb_s_cuda_elg_sparse_mat -#ifdef HAVE_SPGPU private :: s_cuda_elg_get_nzeros, s_cuda_elg_free, s_cuda_elg_get_fmt, & & s_cuda_elg_get_size, s_cuda_elg_sizeof, s_cuda_elg_get_nz_row, s_cuda_elg_sync @@ -460,24 +453,4 @@ contains end subroutine s_cuda_elg_finalize -#else - - interface - subroutine psb_s_cuda_elg_asb(a) - import :: psb_s_cuda_elg_sparse_mat - class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a - end subroutine psb_s_cuda_elg_asb - end interface - - interface - subroutine psb_s_cuda_elg_mold(a,b,info) - import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_cuda_elg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cuda_elg_mold - end interface - -#endif - end module psb_s_cuda_elg_mat_mod diff --git a/cuda/psb_s_cuda_hdiag_mat_mod.F90 b/cuda/psb_s_cuda_hdiag_mat_mod.F90 index 0a66ff09..cac72c86 100644 --- a/cuda/psb_s_cuda_hdiag_mat_mod.F90 +++ b/cuda/psb_s_cuda_hdiag_mat_mod.F90 @@ -38,7 +38,6 @@ module psb_s_cuda_hdiag_mat_mod type, extends(psb_s_hdia_sparse_mat) :: psb_s_cuda_hdiag_sparse_mat ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -62,13 +61,8 @@ module psb_s_cuda_hdiag_mat_mod procedure, pass(a) :: mold => psb_s_cuda_hdiag_mold procedure, pass(a) :: to_gpu => psb_s_cuda_hdiag_to_gpu final :: s_cuda_hdiag_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_cuda_hdiag_mold -#endif end type psb_s_cuda_hdiag_sparse_mat -#ifdef HAVE_SPGPU private :: s_cuda_hdiag_get_nzeros, s_cuda_hdiag_free, s_cuda_hdiag_get_fmt, & & s_cuda_hdiag_get_size, s_cuda_hdiag_sizeof, s_cuda_hdiag_get_nz_row @@ -271,17 +265,4 @@ contains return end subroutine s_cuda_hdiag_finalize -#else - - interface - subroutine psb_s_cuda_hdiag_mold(a,b,info) - import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cuda_hdiag_mold - end interface - -#endif - end module psb_s_cuda_hdiag_mat_mod diff --git a/cuda/psb_s_cuda_hlg_mat_mod.F90 b/cuda/psb_s_cuda_hlg_mat_mod.F90 index 81b94e5d..4f7b4b6f 100644 --- a/cuda/psb_s_cuda_hlg_mat_mod.F90 +++ b/cuda/psb_s_cuda_hlg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_s_cuda_hlg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer :: devstate = is_host @@ -82,13 +81,8 @@ module psb_s_cuda_hlg_mat_mod procedure, pass(a) :: from_gpu => psb_s_cuda_hlg_from_gpu procedure, pass(a) :: to_gpu => psb_s_cuda_hlg_to_gpu final :: s_cuda_hlg_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_cuda_hlg_mold -#endif end type psb_s_cuda_hlg_sparse_mat -#ifdef HAVE_SPGPU private :: s_cuda_hlg_get_nzeros, s_cuda_hlg_free, s_cuda_hlg_get_fmt, & & s_cuda_hlg_get_size, s_cuda_hlg_sizeof, s_cuda_hlg_get_nz_row @@ -382,17 +376,4 @@ contains return end subroutine s_cuda_hlg_finalize -#else - - interface - subroutine psb_s_cuda_hlg_mold(a,b,info) - import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cuda_hlg_mold - end interface - -#endif - end module psb_s_cuda_hlg_mat_mod diff --git a/cuda/psb_s_cuda_hybg_mat_mod.F90 b/cuda/psb_s_cuda_hybg_mat_mod.F90 index ae76aac1..f1446b15 100644 --- a/cuda/psb_s_cuda_hybg_mat_mod.F90 +++ b/cuda/psb_s_cuda_hybg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_s_cuda_hybg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(s_Hmat) :: deviceMat contains @@ -69,13 +68,8 @@ module psb_s_cuda_hybg_mat_mod procedure, pass(a) :: mold => psb_s_cuda_hybg_mold procedure, pass(a) :: to_gpu => psb_s_cuda_hybg_to_gpu final :: s_cuda_hybg_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_cuda_hybg_mold -#endif end type psb_s_cuda_hybg_sparse_mat -#ifdef HAVE_SPGPU private :: s_cuda_hybg_get_nzeros, s_cuda_hybg_free, s_cuda_hybg_get_fmt, & & s_cuda_hybg_get_size, s_cuda_hybg_sizeof, s_cuda_hybg_get_nz_row @@ -289,18 +283,5 @@ contains return end subroutine s_cuda_hybg_finalize -#else - - interface - subroutine psb_s_cuda_hybg_mold(a,b,info) - import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cuda_hybg_mold - end interface - -#endif - end module psb_s_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 index e19c980a..7778eb50 100644 --- a/cuda/psb_s_cuda_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -35,20 +35,17 @@ module psb_s_cuda_vect_mod use psb_const_mod use psb_error_mod use psb_s_vect_mod - use psb_i_vect_mod -#ifdef HAVE_SPGPU use psb_cuda_env_mod + use psb_i_vect_mod use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_s_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_s_base_vect_type) :: psb_s_vect_cuda -#ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr real(c_float), allocatable :: pinned_buffer(:) @@ -105,7 +102,6 @@ module psb_s_cuda_vect_mod procedure, pass(x) :: absval2 => s_cuda_absval2 final :: s_cuda_vect_finalize -#endif end type psb_s_vect_cuda public :: psb_s_vect_cuda_ @@ -126,8 +122,6 @@ contains end function constructor -#ifdef HAVE_SPGPU - subroutine s_cuda_device_wait() call psb_cudaSync() end subroutine s_cuda_device_wait @@ -1204,8 +1198,6 @@ contains end subroutine s_cuda_ins_a -#endif - end module psb_s_cuda_vect_mod @@ -1221,20 +1213,16 @@ module psb_s_cuda_multivect_mod use psb_error_mod use psb_s_multivect_mod use psb_s_base_multivect_mod - + use psb_cuda_env_mod use psb_i_multivect_mod -#ifdef HAVE_SPGPU use psb_i_cuda_multivect_mod use psb_s_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_s_base_multivect_type) :: psb_s_multivect_cuda -#ifdef HAVE_SPGPU - integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 type(c_ptr) :: deviceVect = c_null_ptr real(c_double), allocatable :: buffer(:,:) @@ -1276,7 +1264,6 @@ module psb_s_cuda_multivect_mod !!$ procedure, pass(y) :: sctb => s_cuda_multi_sctb !!$ procedure, pass(y) :: sctb_x => s_cuda_multi_sctb_x final :: s_cuda_multi_vect_finalize -#endif end type psb_s_multivect_cuda public :: psb_s_multivect_cuda @@ -1297,7 +1284,6 @@ contains end function constructor -#ifdef HAVE_SPGPU !!$ subroutine s_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod @@ -1981,8 +1967,6 @@ contains end subroutine s_cuda_multi_ins -#endif - end module psb_s_cuda_multivect_mod diff --git a/cuda/psb_s_vectordev_mod.F90 b/cuda/psb_s_vectordev_mod.F90 index a7319b95..3ecabe70 100644 --- a/cuda/psb_s_vectordev_mod.F90 +++ b/cuda/psb_s_vectordev_mod.F90 @@ -34,8 +34,6 @@ module psb_s_vectordev_mod use psb_base_vectordev_mod -#ifdef HAVE_SPGPU - interface registerMapped function registerMappedFloat(buf,d_p,n,dummy) & & result(res) bind(c,name='registerMappedFloat') @@ -385,6 +383,4 @@ contains call unregisterMapped(c_loc(buffer)) end subroutine inner_unregisterFloat -#endif - end module psb_s_vectordev_mod diff --git a/cuda/psb_z_cuda_csrg_mat_mod.F90 b/cuda/psb_z_cuda_csrg_mat_mod.F90 index 75170185..389149aa 100644 --- a/cuda/psb_z_cuda_csrg_mat_mod.F90 +++ b/cuda/psb_z_cuda_csrg_mat_mod.F90 @@ -48,7 +48,6 @@ module psb_z_cuda_csrg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(z_Cmat) :: deviceMat integer(psb_ipk_) :: devstate = is_host @@ -81,13 +80,8 @@ module psb_z_cuda_csrg_mat_mod procedure, pass(a) :: to_gpu => psb_z_cuda_csrg_to_gpu procedure, pass(a) :: from_gpu => psb_z_cuda_csrg_from_gpu final :: z_cuda_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_cuda_csrg_mold -#endif end type psb_z_cuda_csrg_sparse_mat -#ifdef HAVE_SPGPU private :: z_cuda_csrg_get_nzeros, z_cuda_csrg_free, z_cuda_csrg_get_fmt, & & z_cuda_csrg_get_size, z_cuda_csrg_sizeof, z_cuda_csrg_get_nz_row @@ -378,16 +372,4 @@ contains end subroutine z_cuda_csrg_finalize -#else - interface - subroutine psb_z_cuda_csrg_mold(a,b,info) - import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cuda_csrg_mold - end interface - -#endif - end module psb_z_cuda_csrg_mat_mod diff --git a/cuda/psb_z_cuda_diag_mat_mod.F90 b/cuda/psb_z_cuda_diag_mat_mod.F90 index 80906778..592d50b2 100644 --- a/cuda/psb_z_cuda_diag_mat_mod.F90 +++ b/cuda/psb_z_cuda_diag_mat_mod.F90 @@ -44,7 +44,6 @@ module psb_z_cuda_diag_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -68,13 +67,8 @@ module psb_z_cuda_diag_mat_mod procedure, pass(a) :: mold => psb_z_cuda_diag_mold procedure, pass(a) :: to_gpu => psb_z_cuda_diag_to_gpu final :: z_cuda_diag_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_cuda_diag_mold -#endif end type psb_z_cuda_diag_sparse_mat -#ifdef HAVE_SPGPU private :: z_cuda_diag_get_nzeros, z_cuda_diag_free, z_cuda_diag_get_fmt, & & z_cuda_diag_get_size, z_cuda_diag_sizeof, z_cuda_diag_get_nz_row @@ -292,17 +286,4 @@ contains return end subroutine z_cuda_diag_finalize -#else - - interface - subroutine psb_z_cuda_diag_mold(a,b,info) - import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_cuda_diag_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cuda_diag_mold - end interface - -#endif - end module psb_z_cuda_diag_mat_mod diff --git a/cuda/psb_z_cuda_dnsg_mat_mod.F90 b/cuda/psb_z_cuda_dnsg_mat_mod.F90 index 3fb2488b..3669d6ac 100644 --- a/cuda/psb_z_cuda_dnsg_mat_mod.F90 +++ b/cuda/psb_z_cuda_dnsg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_z_cuda_dnsg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of DNS, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -69,13 +68,8 @@ module psb_z_cuda_dnsg_mat_mod procedure, pass(a) :: mold => psb_z_cuda_dnsg_mold procedure, pass(a) :: to_gpu => psb_z_cuda_dnsg_to_gpu final :: z_cuda_dnsg_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_cuda_dnsg_mold -#endif end type psb_z_cuda_dnsg_sparse_mat -#ifdef HAVE_SPGPU private :: z_cuda_dnsg_get_nzeros, z_cuda_dnsg_free, z_cuda_dnsg_get_fmt, & & z_cuda_dnsg_get_size, z_cuda_dnsg_get_nz_row @@ -278,17 +272,4 @@ contains return end subroutine z_cuda_dnsg_finalize -#else - - interface - subroutine psb_z_cuda_dnsg_mold(a,b,info) - import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cuda_dnsg_mold - end interface - -#endif - end module psb_z_cuda_dnsg_mat_mod diff --git a/cuda/psb_z_cuda_elg_mat_mod.F90 b/cuda/psb_z_cuda_elg_mat_mod.F90 index 9090b0a2..bd36e4b6 100644 --- a/cuda/psb_z_cuda_elg_mat_mod.F90 +++ b/cuda/psb_z_cuda_elg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_z_cuda_elg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of ELL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer(psb_ipk_) :: devstate = is_host @@ -86,14 +85,8 @@ module psb_z_cuda_elg_mat_mod procedure, pass(a) :: to_gpu => psb_z_cuda_elg_to_gpu procedure, pass(a) :: asb => psb_z_cuda_elg_asb final :: z_cuda_elg_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_cuda_elg_mold - procedure, pass(a) :: asb => psb_z_cuda_elg_asb -#endif end type psb_z_cuda_elg_sparse_mat -#ifdef HAVE_SPGPU private :: z_cuda_elg_get_nzeros, z_cuda_elg_free, z_cuda_elg_get_fmt, & & z_cuda_elg_get_size, z_cuda_elg_sizeof, z_cuda_elg_get_nz_row, z_cuda_elg_sync @@ -460,24 +453,4 @@ contains end subroutine z_cuda_elg_finalize -#else - - interface - subroutine psb_z_cuda_elg_asb(a) - import :: psb_z_cuda_elg_sparse_mat - class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a - end subroutine psb_z_cuda_elg_asb - end interface - - interface - subroutine psb_z_cuda_elg_mold(a,b,info) - import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_cuda_elg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cuda_elg_mold - end interface - -#endif - end module psb_z_cuda_elg_mat_mod diff --git a/cuda/psb_z_cuda_hdiag_mat_mod.F90 b/cuda/psb_z_cuda_hdiag_mat_mod.F90 index b64498f6..70897664 100644 --- a/cuda/psb_z_cuda_hdiag_mat_mod.F90 +++ b/cuda/psb_z_cuda_hdiag_mat_mod.F90 @@ -38,7 +38,6 @@ module psb_z_cuda_hdiag_mat_mod type, extends(psb_z_hdia_sparse_mat) :: psb_z_cuda_hdiag_sparse_mat ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -62,13 +61,8 @@ module psb_z_cuda_hdiag_mat_mod procedure, pass(a) :: mold => psb_z_cuda_hdiag_mold procedure, pass(a) :: to_gpu => psb_z_cuda_hdiag_to_gpu final :: z_cuda_hdiag_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_cuda_hdiag_mold -#endif end type psb_z_cuda_hdiag_sparse_mat -#ifdef HAVE_SPGPU private :: z_cuda_hdiag_get_nzeros, z_cuda_hdiag_free, z_cuda_hdiag_get_fmt, & & z_cuda_hdiag_get_size, z_cuda_hdiag_sizeof, z_cuda_hdiag_get_nz_row @@ -271,17 +265,4 @@ contains return end subroutine z_cuda_hdiag_finalize -#else - - interface - subroutine psb_z_cuda_hdiag_mold(a,b,info) - import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cuda_hdiag_mold - end interface - -#endif - end module psb_z_cuda_hdiag_mat_mod diff --git a/cuda/psb_z_cuda_hlg_mat_mod.F90 b/cuda/psb_z_cuda_hlg_mat_mod.F90 index 29ed68fa..2acf43f1 100644 --- a/cuda/psb_z_cuda_hlg_mat_mod.F90 +++ b/cuda/psb_z_cuda_hlg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_z_cuda_hlg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer :: devstate = is_host @@ -82,13 +81,8 @@ module psb_z_cuda_hlg_mat_mod procedure, pass(a) :: from_gpu => psb_z_cuda_hlg_from_gpu procedure, pass(a) :: to_gpu => psb_z_cuda_hlg_to_gpu final :: z_cuda_hlg_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_cuda_hlg_mold -#endif end type psb_z_cuda_hlg_sparse_mat -#ifdef HAVE_SPGPU private :: z_cuda_hlg_get_nzeros, z_cuda_hlg_free, z_cuda_hlg_get_fmt, & & z_cuda_hlg_get_size, z_cuda_hlg_sizeof, z_cuda_hlg_get_nz_row @@ -382,17 +376,4 @@ contains return end subroutine z_cuda_hlg_finalize -#else - - interface - subroutine psb_z_cuda_hlg_mold(a,b,info) - import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cuda_hlg_mold - end interface - -#endif - end module psb_z_cuda_hlg_mat_mod diff --git a/cuda/psb_z_cuda_hybg_mat_mod.F90 b/cuda/psb_z_cuda_hybg_mat_mod.F90 index 1bbc11b2..96620300 100644 --- a/cuda/psb_z_cuda_hybg_mat_mod.F90 +++ b/cuda/psb_z_cuda_hybg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_z_cuda_hybg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(z_Hmat) :: deviceMat contains @@ -69,13 +68,8 @@ module psb_z_cuda_hybg_mat_mod procedure, pass(a) :: mold => psb_z_cuda_hybg_mold procedure, pass(a) :: to_gpu => psb_z_cuda_hybg_to_gpu final :: z_cuda_hybg_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_cuda_hybg_mold -#endif end type psb_z_cuda_hybg_sparse_mat -#ifdef HAVE_SPGPU private :: z_cuda_hybg_get_nzeros, z_cuda_hybg_free, z_cuda_hybg_get_fmt, & & z_cuda_hybg_get_size, z_cuda_hybg_sizeof, z_cuda_hybg_get_nz_row @@ -289,18 +283,5 @@ contains return end subroutine z_cuda_hybg_finalize -#else - - interface - subroutine psb_z_cuda_hybg_mold(a,b,info) - import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cuda_hybg_mold - end interface - -#endif - end module psb_z_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 index 35bfb4b5..53484911 100644 --- a/cuda/psb_z_cuda_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -35,20 +35,17 @@ module psb_z_cuda_vect_mod use psb_const_mod use psb_error_mod use psb_z_vect_mod - use psb_i_vect_mod -#ifdef HAVE_SPGPU use psb_cuda_env_mod + use psb_i_vect_mod use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_z_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_z_base_vect_type) :: psb_z_vect_cuda -#ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr complex(c_double_complex), allocatable :: pinned_buffer(:) @@ -105,7 +102,6 @@ module psb_z_cuda_vect_mod procedure, pass(x) :: absval2 => z_cuda_absval2 final :: z_cuda_vect_finalize -#endif end type psb_z_vect_cuda public :: psb_z_vect_cuda_ @@ -126,8 +122,6 @@ contains end function constructor -#ifdef HAVE_SPGPU - subroutine z_cuda_device_wait() call psb_cudaSync() end subroutine z_cuda_device_wait @@ -1204,8 +1198,6 @@ contains end subroutine z_cuda_ins_a -#endif - end module psb_z_cuda_vect_mod @@ -1221,20 +1213,16 @@ module psb_z_cuda_multivect_mod use psb_error_mod use psb_z_multivect_mod use psb_z_base_multivect_mod - + use psb_cuda_env_mod use psb_i_multivect_mod -#ifdef HAVE_SPGPU use psb_i_cuda_multivect_mod use psb_z_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_z_base_multivect_type) :: psb_z_multivect_cuda -#ifdef HAVE_SPGPU - integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 type(c_ptr) :: deviceVect = c_null_ptr real(c_double), allocatable :: buffer(:,:) @@ -1276,7 +1264,6 @@ module psb_z_cuda_multivect_mod !!$ procedure, pass(y) :: sctb => z_cuda_multi_sctb !!$ procedure, pass(y) :: sctb_x => z_cuda_multi_sctb_x final :: z_cuda_multi_vect_finalize -#endif end type psb_z_multivect_cuda public :: psb_z_multivect_cuda @@ -1297,7 +1284,6 @@ contains end function constructor -#ifdef HAVE_SPGPU !!$ subroutine z_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod @@ -1981,8 +1967,6 @@ contains end subroutine z_cuda_multi_ins -#endif - end module psb_z_cuda_multivect_mod diff --git a/cuda/psb_z_vectordev_mod.F90 b/cuda/psb_z_vectordev_mod.F90 index 58c43a43..8f07cd56 100644 --- a/cuda/psb_z_vectordev_mod.F90 +++ b/cuda/psb_z_vectordev_mod.F90 @@ -34,8 +34,6 @@ module psb_z_vectordev_mod use psb_base_vectordev_mod -#ifdef HAVE_SPGPU - interface registerMapped function registerMappedDoubleComplex(buf,d_p,n,dummy) & & result(res) bind(c,name='registerMappedDoubleComplex') @@ -385,6 +383,4 @@ contains call unregisterMapped(c_loc(buffer)) end subroutine inner_unregisterDoubleComplex -#endif - end module psb_z_vectordev_mod diff --git a/cuda/s_cusparse_mod.F90 b/cuda/s_cusparse_mod.F90 index 6e628fa1..ab322129 100644 --- a/cuda/s_cusparse_mod.F90 +++ b/cuda/s_cusparse_mod.F90 @@ -43,9 +43,6 @@ module s_cusparse_mod end type s_Hmat #endif - -#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) - interface CSRGDeviceFree function s_CSRGDeviceFree(Mat) & & bind(c,name="s_CSRGDeviceFree") result(res) @@ -300,6 +297,4 @@ module s_cusparse_mod end interface #endif -#endif - end module s_cusparse_mod diff --git a/cuda/scusparse.c b/cuda/scusparse.c index 70a0cbd7..2ad2e2dc 100644 --- a/cuda/scusparse.c +++ b/cuda/scusparse.c @@ -33,7 +33,6 @@ #include #include -#ifdef HAVE_SPGPU #include #include #include "cintrf.h" @@ -92,4 +91,3 @@ #include "fcusparse_fct.h" -#endif diff --git a/cuda/svectordev.c b/cuda/svectordev.c index 9a41ae1a..bfa4061a 100644 --- a/cuda/svectordev.c +++ b/cuda/svectordev.c @@ -32,7 +32,6 @@ #include #include -#if defined(HAVE_SPGPU) //#include "utils.h" //#include "common.h" #include "svectordev.h" @@ -300,5 +299,3 @@ int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA) return(i); } -#endif - diff --git a/cuda/svectordev.h b/cuda/svectordev.h index 1fd4fd11..bf25fcb1 100644 --- a/cuda/svectordev.h +++ b/cuda/svectordev.h @@ -31,7 +31,6 @@ #pragma once -#if defined(HAVE_SPGPU) //#include "utils.h" #include "vectordev.h" #include "cuda_runtime.h" @@ -75,4 +74,3 @@ int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA); int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, void *deviceVecB); -#endif diff --git a/cuda/vectordev.c b/cuda/vectordev.c index 2b22a8a6..65b4d533 100644 --- a/cuda/vectordev.c +++ b/cuda/vectordev.c @@ -32,7 +32,6 @@ #include #include -#if defined(HAVE_SPGPU) #include "cuComplex.h" #include "vectordev.h" #include "cuda_runtime.h" @@ -194,5 +193,3 @@ int getMultiVecDevicePitch(void* deviceVec) return(i); } -#endif - diff --git a/cuda/vectordev.h b/cuda/vectordev.h index 9739c01b..8eca7063 100644 --- a/cuda/vectordev.h +++ b/cuda/vectordev.h @@ -31,7 +31,6 @@ #pragma once -#if defined(HAVE_SPGPU) //#include "utils.h" #include "cuda_runtime.h" //#include "common.h" @@ -86,5 +85,3 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams * int getMultiVecDeviceSize(void* deviceVec); int getMultiVecDeviceCount(void* deviceVec); int getMultiVecDevicePitch(void* deviceVec); - -#endif diff --git a/cuda/z_cusparse_mod.F90 b/cuda/z_cusparse_mod.F90 index 020f1de5..c3f21c0c 100644 --- a/cuda/z_cusparse_mod.F90 +++ b/cuda/z_cusparse_mod.F90 @@ -43,9 +43,6 @@ module z_cusparse_mod end type z_Hmat #endif - -#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) - interface CSRGDeviceFree function z_CSRGDeviceFree(Mat) & & bind(c,name="z_CSRGDeviceFree") result(res) @@ -298,7 +295,6 @@ module z_cusparse_mod integer(c_int) :: res end function z_HYBGHost2Device end interface -#endif #endif diff --git a/cuda/zcusparse.c b/cuda/zcusparse.c index 3991359a..050c0ccd 100644 --- a/cuda/zcusparse.c +++ b/cuda/zcusparse.c @@ -33,7 +33,6 @@ #include #include -#ifdef HAVE_SPGPU #include #include #include "cintrf.h" @@ -91,4 +90,3 @@ #include "fcusparse_fct.h" -#endif diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c index c3671a86..0fb1c67e 100644 --- a/cuda/zvectordev.c +++ b/cuda/zvectordev.c @@ -32,7 +32,6 @@ #include #include -#if defined(HAVE_SPGPU) //#include "utils.h" //#include "common.h" #include "zvectordev.h" @@ -317,5 +316,3 @@ int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVec return(i); } -#endif - diff --git a/cuda/zvectordev.h b/cuda/zvectordev.h index ca3c966e..96330a7a 100644 --- a/cuda/zvectordev.h +++ b/cuda/zvectordev.h @@ -31,7 +31,6 @@ #pragma once -#if defined(HAVE_SPGPU) //#include "utils.h" #include #include "cuComplex.h" @@ -87,5 +86,3 @@ int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVec int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVecA, void *deviceVecB); - -#endif From 41491f7b9c3dee27c2200e9178a10911e0f7edc2 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 1 Dec 2023 15:14:50 +0100 Subject: [PATCH 021/110] Fix HAVE_CUDA in test programs --- test/cudakern/dpdegenmv.F90 | 24 ++++++++++++------------ test/cudakern/spdegenmv.F90 | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/test/cudakern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 index bde57f5f..db845d71 100644 --- a/test/cudakern/dpdegenmv.F90 +++ b/test/cudakern/dpdegenmv.F90 @@ -547,7 +547,7 @@ program pdgenmv use psb_base_mod use psb_util_mod use psb_ext_mod -#ifdef HAVE_GPU +#ifdef HAVE_CUDA use psb_cuda_mod #endif #ifdef HAVE_RSB @@ -572,7 +572,7 @@ program pdgenmv type(psb_desc_type) :: desc_a ! dense matrices type(psb_d_vect_type), target :: xv, bv, xg, bg -#ifdef HAVE_GPU +#ifdef HAVE_CUDA type(psb_d_vect_cuda) :: vmold type(psb_i_vect_cuda) :: imold #endif @@ -594,7 +594,7 @@ program pdgenmv #ifdef HAVE_RSB type(psb_d_rsb_sparse_mat), target :: arsb #endif -#ifdef HAVE_GPU +#ifdef HAVE_CUDA type(psb_d_cuda_elg_sparse_mat), target :: aelg type(psb_d_cuda_csrg_sparse_mat), target :: acsrg #if CUDA_SHORT_VERSION <= 10 @@ -618,7 +618,7 @@ program pdgenmv call psb_init(ctxt) call psb_info(ctxt,iam,np) -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call psb_cuda_init(ctxt) #endif #ifdef HAVE_RSB @@ -639,7 +639,7 @@ program pdgenmv write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if -#ifdef HAVE_GPU +#ifdef HAVE_CUDA write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) #endif @@ -698,7 +698,7 @@ program pdgenmv stop end if -#ifdef HAVE_GPU +#ifdef HAVE_CUDA select case(psb_toupper(agfmt)) case('ELG') agmold => aelg @@ -753,7 +753,7 @@ program pdgenmv call xv%bld(x0) call psb_geasb(bv,desc_a,info,scratch=.true.) -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call aux_a%cscnv(agpu,info,mold=acoo) call xg%bld(x0,mold=vmold) @@ -780,7 +780,7 @@ program pdgenmv t2 = psb_wtime() - t1 call psb_amx(ctxt,t2) -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call xg%set(x0) ! FIXME: cache flush needed here @@ -870,7 +870,7 @@ program pdgenmv tflops = flops gflops = flops * ngpu write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() -#ifdef HAVE_GPU +#ifdef HAVE_CUDA write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& & tcnvc1 @@ -900,7 +900,7 @@ program pdgenmv & t2*1.d3/(1.d0*ntests) write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& & flops/1.d6 -#ifdef HAVE_GPU +#ifdef HAVE_CUDA write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& & ntests, tt2 write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& @@ -925,7 +925,7 @@ program pdgenmv bdwdth = ntests*nbytes/(t2*1.d6) write(psb_out_unit,*) write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth -#ifdef HAVE_GPU +#ifdef HAVE_CUDA bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth bdwdth = psb_cuda_MemoryPeakBandwidth() @@ -949,7 +949,7 @@ program pdgenmv call psb_errpush(info,name,a_err=ch_err) goto 9999 end if -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call psb_cuda_exit() #endif call psb_exit(ctxt) diff --git a/test/cudakern/spdegenmv.F90 b/test/cudakern/spdegenmv.F90 index 9644d8c7..f953e163 100644 --- a/test/cudakern/spdegenmv.F90 +++ b/test/cudakern/spdegenmv.F90 @@ -547,7 +547,7 @@ program pdgenmv use psb_base_mod use psb_util_mod use psb_ext_mod -#ifdef HAVE_GPU +#ifdef HAVE_CUDA use psb_cuda_mod #endif use psb_s_pde3d_mod @@ -569,7 +569,7 @@ program pdgenmv type(psb_desc_type) :: desc_a ! dense matrices type(psb_s_vect_type), target :: xv,bv, xg, bg -#ifdef HAVE_GPU +#ifdef HAVE_CUDA type(psb_s_vect_cuda) :: vmold type(psb_i_vect_cuda) :: imold #endif @@ -588,7 +588,7 @@ program pdgenmv type(psb_s_hll_sparse_mat), target :: ahll type(psb_s_dia_sparse_mat), target :: adia type(psb_s_hdia_sparse_mat), target :: ahdia -#ifdef HAVE_GPU +#ifdef HAVE_CUDA type(psb_s_cuda_elg_sparse_mat), target :: aelg type(psb_s_cuda_csrg_sparse_mat), target :: acsrg #if CUDA_SHORT_VERSION <= 10 @@ -612,7 +612,7 @@ program pdgenmv call psb_init(ctxt) call psb_info(ctxt,iam,np) -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call psb_cuda_init(ctxt) #endif #ifdef HAVE_RSB @@ -633,7 +633,7 @@ program pdgenmv write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if -#ifdef HAVE_GPU +#ifdef HAVE_CUDA write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) #endif @@ -692,7 +692,7 @@ program pdgenmv stop end if -#ifdef HAVE_GPU +#ifdef HAVE_CUDA select case(psb_toupper(agfmt)) case('ELG') agmold => aelg @@ -747,7 +747,7 @@ program pdgenmv call xv%bld(x0) call psb_geasb(bv,desc_a,info,scratch=.true.) -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call aux_a%cscnv(agpu,info,mold=acoo) call xg%bld(x0,mold=vmold) @@ -774,7 +774,7 @@ program pdgenmv t2 = psb_wtime() - t1 call psb_amx(ctxt,t2) -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call xg%set(x0) ! FIXME: cache flush needed here @@ -864,7 +864,7 @@ program pdgenmv tflops = flops gflops = flops * ngpu write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() -#ifdef HAVE_GPU +#ifdef HAVE_CUDA write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& & tcnvc1 @@ -894,7 +894,7 @@ program pdgenmv & t2*1.d3/(1.d0*ntests) write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& & flops/1.d6 -#ifdef HAVE_GPU +#ifdef HAVE_CUDA write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& & ntests, tt2 write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& @@ -919,7 +919,7 @@ program pdgenmv bdwdth = ntests*nbytes/(t2*1.d6) write(psb_out_unit,*) write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth -#ifdef HAVE_GPU +#ifdef HAVE_CUDA bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth bdwdth = psb_cuda_MemoryPeakBandwidth() @@ -943,7 +943,7 @@ program pdgenmv call psb_errpush(info,name,a_err=ch_err) goto 9999 end if -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call psb_cuda_exit() #endif call psb_exit(ctxt) From 0230fbb7afb9663287e2a94aa62b61e0511221a1 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 7 Dec 2023 12:09:00 +0100 Subject: [PATCH 022/110] Identufied problems with CSRG. Will fix in a branch --- cuda/CUDA/psi_cuda_CopyCooToElg.cuh | 2 +- cuda/d_cusparse_mod.F90 | 2 + cuda/fcusparse_fct.h | 8 ++- cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 | 2 +- cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 | 2 +- cuda/impl/psb_d_cuda_csrg_to_gpu.F90 | 82 ++++++++++++++++++++++- cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 | 2 +- cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 | 2 +- 8 files changed, 93 insertions(+), 9 deletions(-) diff --git a/cuda/CUDA/psi_cuda_CopyCooToElg.cuh b/cuda/CUDA/psi_cuda_CopyCooToElg.cuh index 10a81a36..98aac050 100644 --- a/cuda/CUDA/psi_cuda_CopyCooToElg.cuh +++ b/cuda/CUDA/psi_cuda_CopyCooToElg.cuh @@ -89,7 +89,7 @@ GEN_PSI_FUNC_NAME(TYPE_SYMBOL) (spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm, int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, int *idiag, int *rP, VALUE_TYPE *cM) -{ int i,j,k, nrws; +{ int i,j, nrws; //int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX; int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); diff --git a/cuda/d_cusparse_mod.F90 b/cuda/d_cusparse_mod.F90 index 509253e6..ae9bcceb 100644 --- a/cuda/d_cusparse_mod.F90 +++ b/cuda/d_cusparse_mod.F90 @@ -97,6 +97,7 @@ module d_cusparse_mod end function d_CSRGDeviceSetMatIndexBase end interface +#if CUDA_SHORT_VERSION <= 10 interface CSRGDeviceCsrsmAnalysis function d_CSRGDeviceCsrsmAnalysis(Mat) & & bind(c,name="d_CSRGDeviceCsrsmAnalysis") result(res) @@ -106,6 +107,7 @@ module d_cusparse_mod integer(c_int) :: res end function d_CSRGDeviceCsrsmAnalysis end interface +#endif interface CSRGDeviceAlloc function d_CSRGDeviceAlloc(Mat,nr,nc,nz) & diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 5a3b1ac6..5afe410d 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -187,7 +187,7 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, (void *) vY, CUSPARSE_BASE_TYPE, CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer)); -#else +#elif CUDA_VERSION <= 12030 cusparseDnVecDescr_t vecX, vecY; size_t bfsz; vX=x->v_; @@ -212,6 +212,8 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, cMat->mvbuffer)); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); +#else + fprintf(stderr,"Unsupported CUSPARSE version\n"); #endif } @@ -244,7 +246,7 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, (const TYPE *) vX, (TYPE *) vY, CUSPARSE_SOLVE_POLICY_USE_LEVEL, (void *) cMat->svbuffer)); -#else +#elif CUDA_VERSION <= 12030 cusparseDnVecDescr_t vecX, vecY; size_t bfsz; vX=x->v_; @@ -285,6 +287,8 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, *(cMat->spsvDescr))); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); +#else + fprintf(stderr,"Unsupported CUSPARSE version\n"); #endif } diff --git a/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 index 65b12a11..e9d42139 100644 --- a/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 @@ -38,7 +38,7 @@ subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a - class(psb_c_base_sparse_mat), intent(inout) :: b + class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info diff --git a/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 index d030538e..8f8e8cbe 100644 --- a/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 @@ -38,7 +38,7 @@ subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(inout) :: b + class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info diff --git a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 index d1949421..16cb541d 100644 --- a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 @@ -227,7 +227,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) endif -#else +#elif 0 if (a%is_unit()) then ! @@ -308,7 +308,85 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) !!$ if ((info == 0) .and. a%is_triangle()) then !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ end if - +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) +!!$ if (a%is_triangle()) then +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if +!!$ if ((info == 0) )then +!!$ if ((info == 0).and.a%is_upper()) then +!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) +!!$ else +!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) +!!$ end if +!!$ end if +!!$ end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + #endif call a%set_sync() diff --git a/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 index 29bbea6e..76871b59 100644 --- a/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 @@ -38,7 +38,7 @@ subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a - class(psb_s_base_sparse_mat), intent(inout) :: b + class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info diff --git a/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 index 26490a15..e086c8a4 100644 --- a/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 @@ -38,7 +38,7 @@ subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a - class(psb_z_base_sparse_mat), intent(inout) :: b + class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info From 6b65199afba2bd6ef5d6838db53ed77daa352fed Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 11 Dec 2023 13:21:23 +0100 Subject: [PATCH 023/110] Check CUDA version for -dopt=on only from 11.7 --- configure | 5 ++++- configure.ac | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/configure b/configure index 48487f98..3a774ee1 100755 --- a/configure +++ b/configure @@ -10834,7 +10834,10 @@ fi pac_cv_cudacc="50,60,70,75"; CUDA_CC="$pac_cv_cudacc"; fi - CUDEFINES="--dopt=on"; + if (( $pac_cv_cuda_version >= 11070 )) + then + CUDEFINES="--dopt=on"; + fi for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` do CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; diff --git a/configure.ac b/configure.ac index 4b76485b..62aad4f8 100755 --- a/configure.ac +++ b/configure.ac @@ -811,7 +811,10 @@ if test "x$pac_cv_have_cuda" == "xyes"; then pac_cv_cudacc="50,60,70,75"; CUDA_CC="$pac_cv_cudacc"; fi - CUDEFINES="--dopt=on"; + if (( $pac_cv_cuda_version >= 11070 )) + then + CUDEFINES="--dopt=on"; + fi for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` do CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; From d28ea462d98d23e2a70c55f8d37bb4fd9d9fc570 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 15 Dec 2023 03:41:19 -0500 Subject: [PATCH 024/110] Modified CSRG to work with latest versions; cusparse docs are unclear --- cuda/c_cusparse_mod.F90 | 14 +++- cuda/ccusparse.c | 34 ++++---- cuda/cuda_util.c | 4 +- cuda/d_cusparse_mod.F90 | 12 ++- cuda/dcusparse.c | 6 ++ cuda/fcusparse.c | 7 +- cuda/fcusparse_fct.h | 118 +++++++++++++++++++-------- cuda/impl/psb_c_cuda_csrg_to_gpu.F90 | 63 +++++++++++++- cuda/impl/psb_d_cuda_csrg_to_gpu.F90 | 21 +---- cuda/impl/psb_s_cuda_csrg_to_gpu.F90 | 63 +++++++++++++- cuda/impl/psb_z_cuda_csrg_to_gpu.F90 | 63 +++++++++++++- cuda/s_cusparse_mod.F90 | 14 +++- cuda/scusparse.c | 14 +++- cuda/z_cusparse_mod.F90 | 14 +++- cuda/zcusparse.c | 13 ++- 15 files changed, 368 insertions(+), 92 deletions(-) diff --git a/cuda/c_cusparse_mod.F90 b/cuda/c_cusparse_mod.F90 index 59f37732..07e57a55 100644 --- a/cuda/c_cusparse_mod.F90 +++ b/cuda/c_cusparse_mod.F90 @@ -97,6 +97,7 @@ module c_cusparse_mod end function c_CSRGDeviceSetMatIndexBase end interface +#if CUDA_SHORT_VERSION <= 10 interface CSRGDeviceCsrsmAnalysis function c_CSRGDeviceCsrsmAnalysis(Mat) & & bind(c,name="c_CSRGDeviceCsrsmAnalysis") result(res) @@ -106,7 +107,18 @@ module c_cusparse_mod integer(c_int) :: res end function c_CSRGDeviceCsrsmAnalysis end interface - +#else + interface CSRGIsNullSvBuffer + function c_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="c_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: res + end function c_CSRGIsNullSvBuffer + end interface +#endif + interface CSRGDeviceAlloc function c_CSRGDeviceAlloc(Mat,nr,nc,nz) & & bind(c,name="c_CSRGDeviceAlloc") result(res) diff --git a/cuda/ccusparse.c b/cuda/ccusparse.c index c5430306..bab6ede0 100644 --- a/cuda/ccusparse.c +++ b/cuda/ccusparse.c @@ -38,8 +38,9 @@ #include "cintrf.h" #include "fcusparse.h" -/* Single precision complex */ -#define TYPE float complex + +/* Double precision real */ +#define TYPE float complex #define CUSPARSE_BASE_TYPE CUDA_C_32F #define T_CSRGDeviceMat c_CSRGDeviceMat #define T_Cmat c_Cmat @@ -54,25 +55,12 @@ #define T_CSRGDeviceGetParms c_CSRGDeviceGetParms #if CUDA_SHORT_VERSION <= 10 - #define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType #define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase #define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis #define cusparseTcsrmv cusparseCcsrmv #define cusparseTcsrsv_solve cusparseCcsrsv_solve #define cusparseTcsrsv_analysis cusparseCcsrsv_analysis - -#elif CUDA_VERSION < 11030 - -#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis -#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize -#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis -#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve - -#else - #define T_HYBGDeviceMat c_HYBGDeviceMat #define T_Hmat c_Hmat #define T_HYBGDeviceFree c_HYBGDeviceFree @@ -89,6 +77,22 @@ #define cusparseThybsv_solve cusparseChybsv_solve #define cusparseThybsv_analysis cusparseChybsv_analysis #define cusparseTcsr2hyb cusparseCcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer c_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr c_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr c_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr c_CSRGCreateSpMVDescr + #endif #include "fcusparse_fct.h" diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c index 09265410..c0e5c6e5 100644 --- a/cuda/cuda_util.c +++ b/cuda/cuda_util.c @@ -228,7 +228,7 @@ int gpuInit(int dev) if (!psb_cublas_handle) psb_cudaCreateCublasHandle(); hasUVA=getDeviceHasUVA(); - + FcusparseCreate(); return err; } @@ -240,7 +240,7 @@ void gpuClose() st1=spgpuGetStream(psb_cuda_handle); if (! psb_cublas_handle) cublasGetStream(psb_cublas_handle,&st2); - + FcusparseDestroy(); psb_cudaDestroyHandle(); if (st1 != st2) psb_cudaDestroyCublasHandle(); diff --git a/cuda/d_cusparse_mod.F90 b/cuda/d_cusparse_mod.F90 index ae9bcceb..399ac085 100644 --- a/cuda/d_cusparse_mod.F90 +++ b/cuda/d_cusparse_mod.F90 @@ -107,8 +107,18 @@ module d_cusparse_mod integer(c_int) :: res end function d_CSRGDeviceCsrsmAnalysis end interface +#else + interface CSRGIsNullSvBuffer + function d_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="d_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: res + end function d_CSRGIsNullSvBuffer + end interface #endif - + interface CSRGDeviceAlloc function d_CSRGDeviceAlloc(Mat,nr,nc,nz) & & bind(c,name="d_CSRGDeviceAlloc") result(res) diff --git a/cuda/dcusparse.c b/cuda/dcusparse.c index f14e787c..657ca5be 100644 --- a/cuda/dcusparse.c +++ b/cuda/dcusparse.c @@ -86,6 +86,12 @@ #define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize #define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis #define cusparseTcsrsv2_solve cusparseDcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer d_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr d_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr d_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr d_CSRGCreateSpMVDescr #endif diff --git a/cuda/fcusparse.c b/cuda/fcusparse.c index c1b661ab..1b37272c 100644 --- a/cuda/fcusparse.c +++ b/cuda/fcusparse.c @@ -53,14 +53,17 @@ int FcusparseCreate() if (ret == CUSPARSE_STATUS_SUCCESS) cusparse_handle = handle; } + fprintf(stderr,"Created cusparses_handle\n"); return (ret); } int FcusparseDestroy() { int val; - val = (int) cusparseDestroy(*cusparse_handle); - free(cusparse_handle); + if (cusparse_handle!=NULL){ + val = (int) cusparseDestroy(*cusparse_handle); + free(cusparse_handle); + } cusparse_handle=NULL; return(val); } diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 5afe410d..578c8b51 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -39,7 +39,7 @@ typedef struct T_CSRGDeviceMat size_t mvbsize, svbsize; void *mvbuffer, *svbuffer; #else - cusparseSpMatDescr_t descr; + cusparseSpMatDescr_t *spmvDescr; cusparseSpSVDescr_t *spsvDescr; size_t mvbsize, svbsize; void *mvbuffer, *svbuffer; @@ -102,6 +102,12 @@ int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); +#else + +int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat); #endif @@ -187,15 +193,20 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, (void *) vY, CUSPARSE_BASE_TYPE, CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer)); -#elif CUDA_VERSION <= 12030 +#else cusparseDnVecDescr_t vecX, vecY; size_t bfsz; + + if (T_CSRGIsNullMvDescr(cMat)) { + cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *)); + } + T_CSRGCreateSpMVDescr(cMat); vX=x->v_; vY=y->v_; CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE(cusparseSpMV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha,cMat->descr,vecX,&beta,vecY, + &alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY, CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, &bfsz)); if (bfsz > cMat->mvbsize) { @@ -207,13 +218,12 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, cMat->mvbsize = bfsz; } CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha,cMat->descr,vecX,&beta,vecY, + &alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY, CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, cMat->mvbuffer)); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); -#else - fprintf(stderr,"Unsupported CUSPARSE version\n"); + CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr))); #endif } @@ -246,16 +256,24 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, (const TYPE *) vX, (TYPE *) vY, CUSPARSE_SOLVE_POLICY_USE_LEVEL, (void *) cMat->svbuffer)); -#elif CUDA_VERSION <= 12030 +#else cusparseDnVecDescr_t vecX, vecY; size_t bfsz; vX=x->v_; vY=y->v_; - cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *)); CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); + // fprintf(stderr,"Entry to SpSVDevice: %d %p\n", + // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr); + if (T_CSRGIsNullSvDescr(cMat)) { + cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *)); + cMat->svbsize=0; + CHECK_CUSPARSE( cusparseSpSV_createDescr(cMat->spsvDescr) ); + } + //fprintf(stderr,"Entry to SpSVDevice: %d %p %d\n", + // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr,cMat->svbsize); CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha,cMat->descr,vecX,vecY, + &alpha,*(cMat->spmvDescr),vecX,vecY, CUSPARSE_BASE_TYPE, CUSPARSE_SPSV_ALG_DEFAULT, *(cMat->spsvDescr), @@ -267,31 +285,49 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, } CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); cMat->svbsize=bfsz; - } - if (cMat->spsvDescr==NULL) { CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, - cMat->descr, + *(cMat->spmvDescr), vecX, vecY, CUSPARSE_BASE_TYPE, CUSPARSE_SPSV_ALG_DEFAULT, *(cMat->spsvDescr), cMat->svbuffer)); } - + if (T_CSRGIsNullSvBuffer(cMat)) { + fprintf(stderr,"SpSV_SOLVE NULL spsv-buffer\n"); + } + T_CSRGCreateSpMVDescr(cMat); CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha,cMat->descr,vecX,vecY, + &alpha,*(cMat->spmvDescr),vecX,vecY, CUSPARSE_BASE_TYPE, CUSPARSE_SPSV_ALG_DEFAULT, *(cMat->spsvDescr))); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); -#else - fprintf(stderr,"Unsupported CUSPARSE version\n"); + CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr))); #endif } + +T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat) +{ + int64_t tr,tc,tz; + tr = cMat->m; + tc = cMat->n; + tz = cMat->nz; + CHECK_CUSPARSE(cusparseCreateCsr(cMat->spmvDescr, + tr,tc,tz, + (void *) cMat->irp, + (void *) cMat->ja, + (void *) cMat->val, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ONE, + CUSPARSE_BASE_TYPE) ); +} + int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) { T_CSRGDeviceMat *cMat; @@ -353,17 +389,8 @@ int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) #else - int64_t rows=nr, cols=nc, nnz=nz; - CHECK_CUSPARSE(cusparseCreateCsr(&(cMat->descr), - rows, cols, nnz, - (void *) cMat->irp, - (void *) cMat->ja, - (void *) cMat->val, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ONE, - CUSPARSE_BASE_TYPE) ); + cMat->spmvDescr=NULL; cMat->spsvDescr=NULL; cMat->mvbuffer=NULL; cMat->svbuffer=NULL; @@ -389,20 +416,23 @@ int T_CSRGDeviceFree(T_Cmat *Matrix) cusparseDestroyMatDescr(cMat->descr); cusparseDestroyCsrsv2Info(cMat->triang); #else - cusparseDestroySpMat(cMat->descr); - if (cMat->spsvDescr!=NULL) { - CHECK_CUSPARSE( cusparseSpSV_destroyDescr(*(cMat->spsvDescr))); - free(cMat->spsvDescr); - cMat->spsvDescr=NULL; + if (!T_CSRGIsNullMvDescr(cMat)) { + // already destroyed spmvDescr, just free the pointer + free(cMat->spmvDescr); + cMat->spmvDescr=NULL; } if (cMat->mvbuffer!=NULL) CHECK_CUDA( cudaFree(cMat->mvbuffer)); + cMat->mvbuffer=NULL; + cMat->mvbsize=0; + if (!T_CSRGIsNullSvDescr(cMat)) { + CHECK_CUSPARSE(cusparseSpSV_destroyDescr(*(cMat->spsvDescr))); + free(cMat->spsvDescr); + cMat->spsvDescr=NULL; + } if (cMat->svbuffer!=NULL) CHECK_CUDA( cudaFree(cMat->svbuffer)); - cMat->spsvDescr=NULL; - cMat->mvbuffer=NULL; cMat->svbuffer=NULL; - cMat->mvbsize=0; cMat->svbsize=0; #endif free(cMat); @@ -500,7 +530,7 @@ int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) T_CSRGDeviceMat *cMat= Matrix->mat; cusparseFillMode_t mode=type; - CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr, + CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr, CUSPARSE_SPMAT_FILL_MODE, (const void*) &mode, sizeof(cusparseFillMode_t))); @@ -511,13 +541,27 @@ int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) { T_CSRGDeviceMat *cMat= Matrix->mat; cusparseDiagType_t cutype=type; - CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr, + CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr, CUSPARSE_SPMAT_DIAG_TYPE, (const void*) &cutype, sizeof(cusparseDiagType_t))); return(0); } +int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat) +{ + return(cMat->spmvDescr == NULL); +} + +int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat) +{ + return(cMat->svbuffer == NULL); +} +int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat) +{ + return(cMat->spsvDescr == NULL); +} + #endif int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz, @@ -550,6 +594,8 @@ int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz, cMat->triang, CUSPARSE_SOLVE_POLICY_USE_LEVEL, cMat->svbuffer)); } +#else + //cusparseSetMatType(*(cMat->spmvDescr),CUSPARSE_MATRIX_TYPE_GENERAL); #endif return(CUSPARSE_STATUS_SUCCESS); } diff --git a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 index aebb07e4..8e7d25a9 100644 --- a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 @@ -227,7 +227,7 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) endif -#else +#elif 0 if (a%is_unit()) then ! @@ -308,7 +308,66 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) !!$ if ((info == 0) .and. a%is_triangle()) then !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ end if - + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + #endif call a%set_sync() diff --git a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 index 16cb541d..4ecb0bbc 100644 --- a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 @@ -308,6 +308,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) !!$ if ((info == 0) .and. a%is_triangle()) then !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ end if + #else if (a%is_unit()) then @@ -325,9 +326,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) end if end if !!! We are explicitly adding the diagonal - !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) if ((info == 0) .and. a%is_triangle()) then -!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else @@ -366,24 +365,6 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) else if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) - !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) -!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) -!!$ if (a%is_triangle()) then -!!$ if (info == 0) then -!!$ if (a%is_unit()) then -!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) -!!$ else -!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) -!!$ end if -!!$ end if -!!$ if ((info == 0) )then -!!$ if ((info == 0).and.a%is_upper()) then -!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) -!!$ else -!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) -!!$ end if -!!$ end if -!!$ end if if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) endif diff --git a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 index cf052e13..246e780d 100644 --- a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 @@ -227,7 +227,7 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) endif -#else +#elif 0 if (a%is_unit()) then ! @@ -308,7 +308,66 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) !!$ if ((info == 0) .and. a%is_triangle()) then !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ end if - + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + #endif call a%set_sync() diff --git a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 index f7e65627..41c96f68 100644 --- a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 @@ -227,7 +227,7 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) endif -#else +#elif 0 if (a%is_unit()) then ! @@ -308,7 +308,66 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) !!$ if ((info == 0) .and. a%is_triangle()) then !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ end if - + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + #endif call a%set_sync() diff --git a/cuda/s_cusparse_mod.F90 b/cuda/s_cusparse_mod.F90 index ab322129..a0214110 100644 --- a/cuda/s_cusparse_mod.F90 +++ b/cuda/s_cusparse_mod.F90 @@ -97,6 +97,7 @@ module s_cusparse_mod end function s_CSRGDeviceSetMatIndexBase end interface +#if CUDA_SHORT_VERSION <= 10 interface CSRGDeviceCsrsmAnalysis function s_CSRGDeviceCsrsmAnalysis(Mat) & & bind(c,name="s_CSRGDeviceCsrsmAnalysis") result(res) @@ -106,7 +107,18 @@ module s_cusparse_mod integer(c_int) :: res end function s_CSRGDeviceCsrsmAnalysis end interface - +#else + interface CSRGIsNullSvBuffer + function s_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="s_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int) :: res + end function s_CSRGIsNullSvBuffer + end interface +#endif + interface CSRGDeviceAlloc function s_CSRGDeviceAlloc(Mat,nr,nc,nz) & & bind(c,name="s_CSRGDeviceAlloc") result(res) diff --git a/cuda/scusparse.c b/cuda/scusparse.c index 2ad2e2dc..d4db9b7c 100644 --- a/cuda/scusparse.c +++ b/cuda/scusparse.c @@ -38,8 +38,9 @@ #include "cintrf.h" #include "fcusparse.h" -/* Single precision real */ -#define TYPE float + +/* Double precision real */ +#define TYPE float #define CUSPARSE_BASE_TYPE CUDA_R_32F #define T_CSRGDeviceMat s_CSRGDeviceMat #define T_Cmat s_Cmat @@ -60,7 +61,6 @@ #define cusparseTcsrmv cusparseScsrmv #define cusparseTcsrsv_solve cusparseScsrsv_solve #define cusparseTcsrsv_analysis cusparseScsrsv_analysis - #define T_HYBGDeviceMat s_HYBGDeviceMat #define T_Hmat s_Hmat #define T_HYBGDeviceFree s_HYBGDeviceFree @@ -78,7 +78,6 @@ #define cusparseThybsv_analysis cusparseShybsv_analysis #define cusparseTcsr2hyb cusparseScsr2hyb - #elif CUDA_VERSION < 11030 #define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType @@ -87,6 +86,13 @@ #define cusparseTcsrsv2_bufferSize cusparseScsrsv2_bufferSize #define cusparseTcsrsv2_analysis cusparseScsrsv2_analysis #define cusparseTcsrsv2_solve cusparseScsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer s_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr s_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr s_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr s_CSRGCreateSpMVDescr + #endif #include "fcusparse_fct.h" diff --git a/cuda/z_cusparse_mod.F90 b/cuda/z_cusparse_mod.F90 index c3f21c0c..91a6fcbd 100644 --- a/cuda/z_cusparse_mod.F90 +++ b/cuda/z_cusparse_mod.F90 @@ -97,6 +97,7 @@ module z_cusparse_mod end function z_CSRGDeviceSetMatIndexBase end interface +#if CUDA_SHORT_VERSION <= 10 interface CSRGDeviceCsrsmAnalysis function z_CSRGDeviceCsrsmAnalysis(Mat) & & bind(c,name="z_CSRGDeviceCsrsmAnalysis") result(res) @@ -106,7 +107,18 @@ module z_cusparse_mod integer(c_int) :: res end function z_CSRGDeviceCsrsmAnalysis end interface - +#else + interface CSRGIsNullSvBuffer + function z_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="z_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: res + end function z_CSRGIsNullSvBuffer + end interface +#endif + interface CSRGDeviceAlloc function z_CSRGDeviceAlloc(Mat,nr,nc,nz) & & bind(c,name="z_CSRGDeviceAlloc") result(res) diff --git a/cuda/zcusparse.c b/cuda/zcusparse.c index 050c0ccd..a70a6573 100644 --- a/cuda/zcusparse.c +++ b/cuda/zcusparse.c @@ -38,8 +38,9 @@ #include "cintrf.h" #include "fcusparse.h" -/* Double precision complex */ -#define TYPE double complex + +/* Double precision real */ +#define TYPE double complex #define CUSPARSE_BASE_TYPE CUDA_C_64F #define T_CSRGDeviceMat z_CSRGDeviceMat #define T_Cmat z_Cmat @@ -85,8 +86,14 @@ #define cusparseTcsrsv2_bufferSize cusparseZcsrsv2_bufferSize #define cusparseTcsrsv2_analysis cusparseZcsrsv2_analysis #define cusparseTcsrsv2_solve cusparseZcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer z_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr z_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr z_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr z_CSRGCreateSpMVDescr + #endif #include "fcusparse_fct.h" - From 62db7c0449f2046fc9a7cd890f123a8a71ef7336 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 15 Dec 2023 04:11:01 -0500 Subject: [PATCH 025/110] Fix spsv with CSRG handling of descriptors. --- cuda/fcusparse_fct.h | 59 +++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 578c8b51..85473c99 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -263,42 +263,45 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, vY=y->v_; CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); + if (T_CSRGIsNullMvDescr(cMat)) { + cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *)); + } + T_CSRGCreateSpMVDescr(cMat); // fprintf(stderr,"Entry to SpSVDevice: %d %p\n", // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr); if (T_CSRGIsNullSvDescr(cMat)) { cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *)); cMat->svbsize=0; CHECK_CUSPARSE( cusparseSpSV_createDescr(cMat->spsvDescr) ); - } - //fprintf(stderr,"Entry to SpSVDevice: %d %p %d\n", - // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr,cMat->svbsize); - CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha,*(cMat->spmvDescr),vecX,vecY, - CUSPARSE_BASE_TYPE, - CUSPARSE_SPSV_ALG_DEFAULT, - *(cMat->spsvDescr), - &bfsz)); - if (bfsz > cMat->svbsize) { - if (cMat->svbuffer != NULL) { - CHECK_CUDA(cudaFree(cMat->svbuffer)); - cMat->svbuffer = NULL; + //fprintf(stderr,"Entry to SpSVDevice: %d %p %d\n", + // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr,cMat->svbsize); + CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,*(cMat->spmvDescr),vecX,vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr), + &bfsz)); + if (bfsz > cMat->svbsize) { + if (cMat->svbuffer != NULL) { + CHECK_CUDA(cudaFree(cMat->svbuffer)); + cMat->svbuffer = NULL; + } + CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + cMat->svbsize=bfsz; + CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + *(cMat->spmvDescr), + vecX, vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr), + cMat->svbuffer)); + } + if (T_CSRGIsNullSvBuffer(cMat)) { + fprintf(stderr,"SpSV_SOLVE NULL spsv-buffer\n"); } - CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); - cMat->svbsize=bfsz; - CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, - CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha, - *(cMat->spmvDescr), - vecX, vecY, - CUSPARSE_BASE_TYPE, - CUSPARSE_SPSV_ALG_DEFAULT, - *(cMat->spsvDescr), - cMat->svbuffer)); - } - if (T_CSRGIsNullSvBuffer(cMat)) { - fprintf(stderr,"SpSV_SOLVE NULL spsv-buffer\n"); } - T_CSRGCreateSpMVDescr(cMat); CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha,*(cMat->spmvDescr),vecX,vecY, CUSPARSE_BASE_TYPE, From 1bc2a884e21c2c5a8d96fd5192181fb07a38a518 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 15 Dec 2023 10:52:48 +0100 Subject: [PATCH 026/110] Adjust conditional compilation on CUDA version --- cuda/fcusparse_fct.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 85473c99..52df7b78 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -313,7 +313,7 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, #endif } - +#if CUDA_VERSION >= 11030 T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat) { int64_t tr,tc,tz; @@ -330,7 +330,7 @@ T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat) CUSPARSE_INDEX_BASE_ONE, CUSPARSE_BASE_TYPE) ); } - +#endif int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) { T_CSRGDeviceMat *cMat; From 20a01d4d71df474faea3e5fed5b518c1de8bef9e Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 19 Dec 2023 11:54:42 -0500 Subject: [PATCH 027/110] Attempt at fixing CSRG in CUDA 10.2. Not complete yet. --- cuda/fcusparse_fct.h | 13 +++++++------ cuda/impl/psb_c_cuda_csrg_to_gpu.F90 | 20 ++++++++++---------- cuda/impl/psb_d_cuda_csrg_to_gpu.F90 | 20 ++++++++++---------- cuda/impl/psb_s_cuda_csrg_to_gpu.F90 | 20 ++++++++++---------- cuda/impl/psb_z_cuda_csrg_to_gpu.F90 | 20 ++++++++++---------- 5 files changed, 47 insertions(+), 46 deletions(-) diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 52df7b78..06facdc0 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -141,15 +141,16 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, cusparseHandle_t *my_handle=getHandle(); TYPE ealpha=alpha, ebeta=beta; #if CUDA_SHORT_VERSION <= 10 - /*getAddrMultiVecDevice(deviceX, &vX); - getAddrMultiVecDevice(deviceY, &vY); */ + /* getAddrMultiVecDevice(deviceX, &vX); */ + /* getAddrMultiVecDevice(deviceY, &vY); */ vX=x->v_; vY=y->v_; - return cusparseTcsrmv(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - cMat->m,cMat->n,cMat->nz,(const TYPE *) &alpha,cMat->descr, - cMat->val, cMat->irp, cMat->ja, - (const TYPE *) vX, (const TYPE *) &beta, (TYPE *) vY); + CHECK_CUSPARSE(cusparseTcsrmv(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->n,cMat->nz,(const TYPE *) &alpha,cMat->descr, + cMat->val, cMat->irp, cMat->ja, + (const TYPE *) vX, (const TYPE *) &beta, (TYPE *) vY)); + #elif CUDA_VERSION < 11030 size_t bfsz; vX=x->v_; diff --git a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 index 8e7d25a9..cc3fbaaf 100644 --- a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 @@ -55,7 +55,7 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) if (c_associated(a%deviceMat%Mat)) then info = CSRGDeviceFree(a%deviceMat) end if -#if CUDA_SHORT_VERSION <= 10 +#if (CUDA_SHORT_VERSION <= 10 ) if (a%is_unit()) then ! ! CUSPARSE has the habit of storing the diagonal and then ignoring, @@ -74,7 +74,7 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) !!! We are explicitly adding the diagonal !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else @@ -114,15 +114,15 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) - if (info == 0) then - if (a%is_unit()) then - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) - else - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) - end if - end if +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else diff --git a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 index 4ecb0bbc..d7a1b1e7 100644 --- a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 @@ -55,7 +55,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) if (c_associated(a%deviceMat%Mat)) then info = CSRGDeviceFree(a%deviceMat) end if -#if CUDA_SHORT_VERSION <= 10 +#if (CUDA_SHORT_VERSION <= 10 ) if (a%is_unit()) then ! ! CUSPARSE has the habit of storing the diagonal and then ignoring, @@ -74,7 +74,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) !!! We are explicitly adding the diagonal !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else @@ -114,15 +114,15 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) - if (info == 0) then - if (a%is_unit()) then - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) - else - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) - end if - end if +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else diff --git a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 index 246e780d..cc5b9c8d 100644 --- a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 @@ -55,7 +55,7 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) if (c_associated(a%deviceMat%Mat)) then info = CSRGDeviceFree(a%deviceMat) end if -#if CUDA_SHORT_VERSION <= 10 +#if (CUDA_SHORT_VERSION <= 10 ) if (a%is_unit()) then ! ! CUSPARSE has the habit of storing the diagonal and then ignoring, @@ -74,7 +74,7 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) !!! We are explicitly adding the diagonal !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else @@ -114,15 +114,15 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) - if (info == 0) then - if (a%is_unit()) then - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) - else - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) - end if - end if +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else diff --git a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 index 41c96f68..56943f37 100644 --- a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 @@ -55,7 +55,7 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) if (c_associated(a%deviceMat%Mat)) then info = CSRGDeviceFree(a%deviceMat) end if -#if CUDA_SHORT_VERSION <= 10 +#if (CUDA_SHORT_VERSION <= 10 ) if (a%is_unit()) then ! ! CUSPARSE has the habit of storing the diagonal and then ignoring, @@ -74,7 +74,7 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) !!! We are explicitly adding the diagonal !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else @@ -114,15 +114,15 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) - if (info == 0) then - if (a%is_unit()) then - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) - else - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) - end if - end if +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else From e9d1238b43c3f1c6b9689920701ead8d65ce8d0f Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 20 Dec 2023 13:30:09 +0100 Subject: [PATCH 028/110] Add detailed measurements. --- base/psblas/psb_dspmm.f90 | 39 ++++++++++++++++++++++++++++--------- test/kernel/pdgenspmv.f90 | 27 +++++++++++++++---------- test/pargen/psb_d_pde3d.F90 | 4 ++-- test/pargen/runs/ppde.inp | 2 +- 4 files changed, 50 insertions(+), 22 deletions(-) diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index 7888188a..780b4d24 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_dspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -184,18 +200,22 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& logical, parameter :: do_timings=.true. real(psb_dpk_) :: t1, t2, t3, t4, t5 if (do_timings) call psb_barrier(ctxt) - if (do_timings) t1= psb_wtime() + if (do_timings) call psb_tic(mv_phase1) if (doswap_) call psi_swapdata(psb_swap_send_,& & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) t2= psb_wtime() + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (do_timings) t3= psb_wtime() + if (do_timings) call psb_toc(mv_phase2) + if (do_timings) call psb_tic(mv_phase3) if (doswap_) call psi_swapdata(psb_swap_recv_,& & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) if (do_timings) t4= psb_wtime() call a%and%spmm(alpha,x%v,done,y%v,info) - if (do_timings) t5= psb_wtime() - if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 + if (do_timings) call psb_toc(mv_phase4) + end block else @@ -203,15 +223,16 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& logical, parameter :: do_timings=.true. real(psb_dpk_) :: t1, t2, t3, t4, t5 if (do_timings) call psb_barrier(ctxt) - if (do_timings) t1= psb_wtime() + + if (do_timings) call psb_tic(mv_phase11) if (doswap_) then call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) end if - if (do_timings) t2= psb_wtime() + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) call psb_csmm(alpha,a,x,beta,y,info) - if (do_timings) t3= psb_wtime() - if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2 + if (do_timings) call psb_toc(mv_phase12) end block end if diff --git a/test/kernel/pdgenspmv.f90 b/test/kernel/pdgenspmv.f90 index d5fd9ba4..b7204edd 100644 --- a/test/kernel/pdgenspmv.f90 +++ b/test/kernel/pdgenspmv.f90 @@ -142,7 +142,7 @@ contains ! the rhs. ! subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& - & f,amold,vmold,imold,partition,nrl,iv) + & f,amold,vmold,imold,partition,nrl,iv,tnd) use psb_base_mod use psb_util_mod ! @@ -173,7 +173,7 @@ contains class(psb_d_base_vect_type), optional :: vmold class(psb_i_base_vect_type), optional :: imold integer(psb_ipk_), optional :: partition, nrl,iv(:) - + logical, optional :: tnd ! Local variables. integer(psb_ipk_), parameter :: nb=20 @@ -202,6 +202,7 @@ contains real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb integer(psb_ipk_) :: err_act procedure(d_func_3d), pointer :: f_ + logical :: tnd_ character(len=20) :: name, ch_err,tmpfmt info = psb_success_ @@ -495,9 +496,9 @@ contains t1 = psb_wtime() if (info == psb_success_) then if (present(amold)) then - call psb_spasb(a,desc_a,info,mold=amold) + call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd) else - call psb_spasb(a,desc_a,info,afmt=afmt) + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd) end if end if call psb_barrier(ctxt) @@ -549,13 +550,14 @@ program pdgenspmv use psb_base_mod use psb_util_mod use psb_d_pde3d_mod + implicit none ! input parameters character(len=20) :: kmethd, ptype character(len=5) :: afmt integer(psb_ipk_) :: idim - + logical :: tnd ! miscellaneous real(psb_dpk_), parameter :: one = done real(psb_dpk_) :: t1, t2, tprec, flops, tflops, tt1, tt2, bdwdth @@ -606,14 +608,14 @@ program pdgenspmv ! ! get parameters ! - call get_parms(ctxt,afmt,idim) - + call get_parms(ctxt,afmt,idim,tnd) + call psb_init_timers() ! ! allocate and fill in the coefficient matrix, rhs and initial guess ! call psb_barrier(ctxt) t1 = psb_wtime() - call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info) + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,tnd=tnd) call psb_barrier(ctxt) t2 = psb_wtime() - t1 if(info /= psb_success_) then @@ -694,7 +696,7 @@ program pdgenspmv write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize end if - + call psb_print_timers(ctxt) ! ! cleanup storage and exit @@ -721,10 +723,11 @@ contains ! ! get iteration parameters from standard input ! - subroutine get_parms(ctxt,afmt,idim) + subroutine get_parms(ctxt,afmt,idim,tnd) type(psb_ctxt_type) :: ctxt character(len=*) :: afmt integer(psb_ipk_) :: idim + logical :: tnd integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: intbuf(10), ip @@ -733,9 +736,11 @@ contains if (iam == 0) then read(psb_inp_unit,*) afmt read(psb_inp_unit,*) idim + read(psb_inp_unit,*) tnd endif call psb_bcast(ctxt,afmt) call psb_bcast(ctxt,idim) + call psb_bcast(ctxt,tnd) if (iam == 0) then write(psb_out_unit,'("Testing matrix : ell1")') @@ -743,6 +748,8 @@ contains write(psb_out_unit,'("Number of processors : ",i0)')np write(psb_out_unit,'("Data distribution : BLOCK")') write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Storage format ",a)') afmt + write(psb_out_unit,'("Testing overlap ND ",l8)') tnd end if return diff --git a/test/pargen/psb_d_pde3d.F90 b/test/pargen/psb_d_pde3d.F90 index 62bb8b40..4748569c 100644 --- a/test/pargen/psb_d_pde3d.F90 +++ b/test/pargen/psb_d_pde3d.F90 @@ -868,8 +868,8 @@ program psb_d_pde3d call psb_errpush(info,name,a_err=ch_err) goto 9999 end if - - call psb_exit(ctxt) + call psb_print_timers(ctxt) + call psb_exit(ctxt) stop 9999 call psb_error(ctxt) diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index cf7179ac..470bcf58 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -5,7 +5,7 @@ CSR Storage format for matrix A: CSR COO 200 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) 3 Partition: 1 BLOCK 3 3D 2 Stopping criterion 1 2 -0008 MAXIT +0200 MAXIT 10 ITRACE 002 IRST restart for RGMRES and BiCGSTABL INVK Block Solver ILU,ILUT,INVK,AINVT,AORTH From be7571f56868362159e9ad6ea459434009fae794 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 20 Dec 2023 13:45:43 +0100 Subject: [PATCH 029/110] Fix missing directive --- base/serial/impl/psb_c_csr_impl.F90 | 1 + base/serial/impl/psb_d_csr_impl.F90 | 1 + base/serial/impl/psb_s_csr_impl.F90 | 1 + base/serial/impl/psb_z_csr_impl.F90 | 1 + 4 files changed, 4 insertions(+) diff --git a/base/serial/impl/psb_c_csr_impl.F90 b/base/serial/impl/psb_c_csr_impl.F90 index f6426f49..6c21f639 100644 --- a/base/serial/impl/psb_c_csr_impl.F90 +++ b/base/serial/impl/psb_c_csr_impl.F90 @@ -4310,6 +4310,7 @@ contains end subroutine csr_spspmm end subroutine psb_ccsrspspmm +#endif subroutine psb_c_ecsr_mold(a,b,info) use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_mold diff --git a/base/serial/impl/psb_d_csr_impl.F90 b/base/serial/impl/psb_d_csr_impl.F90 index 40d97bc0..9f1d509c 100644 --- a/base/serial/impl/psb_d_csr_impl.F90 +++ b/base/serial/impl/psb_d_csr_impl.F90 @@ -4310,6 +4310,7 @@ contains end subroutine csr_spspmm end subroutine psb_dcsrspspmm +#endif subroutine psb_d_ecsr_mold(a,b,info) use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_mold diff --git a/base/serial/impl/psb_s_csr_impl.F90 b/base/serial/impl/psb_s_csr_impl.F90 index abce0086..a4e1ab82 100644 --- a/base/serial/impl/psb_s_csr_impl.F90 +++ b/base/serial/impl/psb_s_csr_impl.F90 @@ -4310,6 +4310,7 @@ contains end subroutine csr_spspmm end subroutine psb_scsrspspmm +#endif subroutine psb_s_ecsr_mold(a,b,info) use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_mold diff --git a/base/serial/impl/psb_z_csr_impl.F90 b/base/serial/impl/psb_z_csr_impl.F90 index b550e8f1..28ac121e 100644 --- a/base/serial/impl/psb_z_csr_impl.F90 +++ b/base/serial/impl/psb_z_csr_impl.F90 @@ -4310,6 +4310,7 @@ contains end subroutine csr_spspmm end subroutine psb_zcsrspspmm +#endif subroutine psb_z_ecsr_mold(a,b,info) use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_mold From 49e99a3e82a49d78726bc0b82eda0e07b5eb80db Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 22 Dec 2023 12:01:41 +0100 Subject: [PATCH 030/110] Fix conversion and product to enable overlap with GPU --- base/psblas/psb_dspmm.f90 | 1 + base/serial/impl/psb_d_mat_impl.F90 | 313 ++++++++++++++++++++-------- 2 files changed, 231 insertions(+), 83 deletions(-) diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index 780b4d24..8e48c4c2 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -199,6 +199,7 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& block logical, parameter :: do_timings=.true. real(psb_dpk_) :: t1, t2, t3, t4, t5 + !write(0,*) 'Going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() if (do_timings) call psb_barrier(ctxt) if (do_timings) call psb_tic(mv_phase1) if (doswap_) call psi_swapdata(psb_swap_send_,& diff --git a/base/serial/impl/psb_d_mat_impl.F90 b/base/serial/impl/psb_d_mat_impl.F90 index 2a6fb9a5..caf725d1 100644 --- a/base/serial/impl/psb_d_mat_impl.F90 +++ b/base/serial/impl/psb_d_mat_impl.F90 @@ -1246,54 +1246,66 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then - - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_d_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_d_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_d_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) +!!$ if (present(mold)) then +!!$ +!!$ allocate(altmp, mold=mold,stat=info) +!!$ +!!$ else if (present(type)) then +!!$ +!!$ select case (psb_toupper(type)) +!!$ case ('CSR') +!!$ allocate(psb_d_csr_sparse_mat :: altmp, stat=info) +!!$ case ('COO') +!!$ allocate(psb_d_coo_sparse_mat :: altmp, stat=info) +!!$ case ('CSC') +!!$ allocate(psb_d_csc_sparse_mat :: altmp, stat=info) +!!$ case default +!!$ info = psb_err_format_unknown_ +!!$ call psb_errpush(info,name,a_err=type) +!!$ goto 9999 +!!$ end select +!!$ else +!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) +!!$ end if +!!$ +!!$ if (info /= psb_success_) then +!!$ info = psb_err_alloc_dealloc_ +!!$ call psb_errpush(info,name) +!!$ goto 9999 +!!$ end if +!!$ +!!$ +!!$ if (present(dupl)) then +!!$ call altmp%set_dupl(dupl) +!!$ else if (a%is_bld()) then +!!$ ! Does this make sense at all?? Who knows.. +!!$ call altmp%set_dupl(psb_dupl_def_) +!!$ end if +!!$ +!!$ if (debug) write(psb_err_unit,*) 'Converting from ',& +!!$ & a%get_fmt(),' to ',altmp%get_fmt() +!!$ +!!$ call altmp%cp_from_fmt(a%a, info) +!!$ +!!$ if (info /= psb_success_) then +!!$ info = psb_err_from_subroutine_ +!!$ call psb_errpush(info,name,a_err="mv_from") +!!$ goto 9999 +!!$ end if +!!$ +!!$ call move_alloc(altmp,b%a) + call inner_cp_alloc(a%a,b%a,info,type,mold) + if (info /= 0) goto 9999 + if (allocated(a%ad)) then + call inner_cp_alloc(a%ad,b%ad,info,type,mold) + if (info /= 0) goto 9999 end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if - - - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) + if (allocated(a%and)) then + call inner_cp_alloc(a%and,b%and,info,type,mold) + if (info /= 0) goto 9999 end if - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%cp_from_fmt(a%a, info) - - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 - end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,6 +1315,69 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_alloc(a,b,info,type,mold) + class(psb_d_base_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + character(len=*), optional, intent(in) :: type + class(psb_d_base_sparse_mat), intent(in), optional :: mold + + class(psb_d_base_sparse_mat), allocatable :: altmp + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else +!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_alloc end subroutine psb_d_cscnv @@ -1345,46 +1420,57 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then - - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_d_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_d_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_d_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) +!!$ if (present(mold)) then +!!$ +!!$ allocate(altmp, mold=mold,stat=info) +!!$ +!!$ else if (present(type)) then +!!$ +!!$ select case (psb_toupper(type)) +!!$ case ('CSR') +!!$ allocate(psb_d_csr_sparse_mat :: altmp, stat=info) +!!$ case ('COO') +!!$ allocate(psb_d_coo_sparse_mat :: altmp, stat=info) +!!$ case ('CSC') +!!$ allocate(psb_d_csc_sparse_mat :: altmp, stat=info) +!!$ case default +!!$ info = psb_err_format_unknown_ +!!$ call psb_errpush(info,name,a_err=type) +!!$ goto 9999 +!!$ end select +!!$ else +!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) +!!$ end if +!!$ +!!$ if (info /= psb_success_) then +!!$ info = psb_err_alloc_dealloc_ +!!$ call psb_errpush(info,name) +!!$ goto 9999 +!!$ end if +!!$ +!!$ if (debug) write(psb_err_unit,*) 'Converting in-place from ',& +!!$ & a%get_fmt(),' to ',altmp%get_fmt() +!!$ +!!$ call altmp%mv_from_fmt(a%a, info) +!!$ +!!$ if (info /= psb_success_) then +!!$ info = psb_err_from_subroutine_ +!!$ call psb_errpush(info,name,a_err="mv_from") +!!$ goto 9999 +!!$ end if +!!$ +!!$ call move_alloc(altmp,a%a) + + call inner_mv_alloc(a%a,info,type,mold) + if (info /= 0) goto 9999 + if (allocated(a%ad)) then + call inner_mv_alloc(a%ad,info,type,mold) + if (info /= 0) goto 9999 end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 + if (allocated(a%and)) then + call inner_mv_alloc(a%and,info,type,mold) + if (info /= 0) goto 9999 end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 - end if - - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,7 +1480,68 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return - +contains + subroutine inner_mv_alloc(a,info,type,mold) + class(psb_d_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + character(len=*), optional, intent(in) :: type + class(psb_d_base_sparse_mat), intent(in), optional :: mold + + class(psb_d_base_sparse_mat), allocatable :: altmp + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else +!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_alloc end subroutine psb_d_cscnv_ip From 4d051c777d52eea024c1e6da36483b98e650ba77 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 22 Dec 2023 12:01:59 +0100 Subject: [PATCH 031/110] Fix makefile and test program --- test/cudakern/Makefile | 5 ++++- test/cudakern/dpdegenmv.F90 | 38 +++++++++++++++++++++++++++---------- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/test/cudakern/Makefile b/test/cudakern/Makefile index fdd4f588..5d938973 100755 --- a/test/cudakern/Makefile +++ b/test/cudakern/Makefile @@ -24,9 +24,12 @@ DPGOBJS=dpdegenmv.o SPGOBJS=spdegenmv.o EXEDIR=./runs -all: pgen file +all: dir pgen file pgen: dpdegenmv spdegenmv file: s_file_spmv c_file_spmv d_file_spmv z_file_spmv +dpdegenmv spdegenmv s_file_spmv c_file_spmv d_file_spmv z_file_spmv: dir +dir: + (if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi) dpdegenmv: $(DPGOBJS) $(FLINK) $(LOPT) $(DPGOBJS) -fopenmp -o dpdegenmv $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) diff --git a/test/cudakern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 index db845d71..85059e81 100644 --- a/test/cudakern/dpdegenmv.F90 +++ b/test/cudakern/dpdegenmv.F90 @@ -70,6 +70,16 @@ contains ! ! functions parametrizing the differential equation ! + + ! + ! Note: b1, b2 and b3 are the coefficients of the first + ! derivative of the unknown function. The default + ! we apply here is to have them zero, so that the resulting + ! matrix is symmetric/hermitian and suitable for + ! testing with CG and FCG. + ! When testing methods for non-hermitian matrices you can + ! change the B1/B2/B3 functions to e.g. done/sqrt((3*done)) + ! function b1(x,y,z) use psb_base_mod, only : psb_dpk_, done, dzero implicit none @@ -138,7 +148,7 @@ contains ! the rhs. ! subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& - & f,amold,vmold,imold,partition,nrl,iv) + & f,amold,vmold,imold,partition,nrl,iv,tnd) use psb_base_mod use psb_util_mod ! @@ -169,7 +179,7 @@ contains class(psb_d_base_vect_type), optional :: vmold class(psb_i_base_vect_type), optional :: imold integer(psb_ipk_), optional :: partition, nrl,iv(:) - + logical, optional :: tnd ! Local variables. integer(psb_ipk_), parameter :: nb=20 @@ -198,6 +208,7 @@ contains real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb integer(psb_ipk_) :: err_act procedure(d_func_3d), pointer :: f_ + logical :: tnd_ character(len=20) :: name, ch_err,tmpfmt info = psb_success_ @@ -492,9 +503,9 @@ contains t1 = psb_wtime() if (info == psb_success_) then if (present(amold)) then - call psb_spasb(a,desc_a,info,mold=amold) + call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd) else - call psb_spasb(a,desc_a,info,afmt=afmt) + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd) end if end if call psb_barrier(ctxt) @@ -559,7 +570,7 @@ program pdgenmv ! input parameters character(len=5) :: acfmt, agfmt integer :: idim - + logical :: tnd ! miscellaneous real(psb_dpk_), parameter :: one = 1.d0 real(psb_dpk_) :: t1, t2, tprec, flops, tflops,& @@ -646,14 +657,14 @@ program pdgenmv ! ! get parameters ! - call get_parms(ctxt,acfmt,agfmt,idim) - + call get_parms(ctxt,acfmt,agfmt,idim,tnd) + call psb_init_timers() ! ! allocate and fill in the coefficient matrix and initial vectors ! call psb_barrier(ctxt) t1 = psb_wtime() - call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,'CSR ',info,partition=3) + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,'CSR ',info,partition=3,tnd=tnd) call psb_barrier(ctxt) t2 = psb_wtime() - t1 if(info /= psb_success_) then @@ -935,6 +946,7 @@ program pdgenmv write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize end if + call psb_print_timers(ctxt) ! ! cleanup storage and exit @@ -962,10 +974,11 @@ contains ! ! get iteration parameters from standard input ! - subroutine get_parms(ctxt,acfmt,agfmt,idim) + subroutine get_parms(ctxt,acfmt,agfmt,idim,tnd) type(psb_ctxt_type) :: ctxt character(len=*) :: agfmt, acfmt integer :: idim + logical :: tnd integer :: np, iam integer :: intbuf(10), ip @@ -978,17 +991,22 @@ contains read(psb_inp_unit,*) agfmt write(*,*) 'Size of discretization cube?' read(psb_inp_unit,*) idim + write(*,*) 'Try comm/comp overlap?' + read(psb_inp_unit,*) tnd endif call psb_bcast(ctxt,acfmt) call psb_bcast(ctxt,agfmt) call psb_bcast(ctxt,idim) - + call psb_bcast(ctxt,tnd) + if (iam == 0) then write(psb_out_unit,'("Testing matrix : ell1")') write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim write(psb_out_unit,'("Number of processors : ",i0)')np write(psb_out_unit,'("Data distribution : BLOCK")') write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Storage formats ",a)') acfmt,' ',agfmt + write(psb_out_unit,'("Testing overlap ND ",l8)') tnd end if return From 3aa3c795e98fc2a8a4dc9f8ee3aa36e74f13fef8 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Sat, 23 Dec 2023 13:15:01 +0100 Subject: [PATCH 032/110] Refactor assembly and cnv --- base/modules/serial/psb_c_mat_mod.F90 | 15 +- base/modules/serial/psb_d_mat_mod.F90 | 15 +- base/modules/serial/psb_s_mat_mod.F90 | 15 +- base/modules/serial/psb_z_mat_mod.F90 | 15 +- base/psblas/psb_cspmm.f90 | 74 ++++-- base/psblas/psb_dspmm.f90 | 71 +++-- base/psblas/psb_sspmm.f90 | 74 ++++-- base/psblas/psb_zspmm.f90 | 74 ++++-- base/serial/impl/psb_c_mat_impl.F90 | 356 ++++++++++++++++++++----- base/serial/impl/psb_d_mat_impl.F90 | 369 +++++++++++++++----------- base/serial/impl/psb_s_mat_impl.F90 | 356 ++++++++++++++++++++----- base/serial/impl/psb_z_mat_impl.F90 | 356 ++++++++++++++++++++----- base/tools/psb_cspasb.f90 | 73 ++--- base/tools/psb_dspasb.f90 | 73 ++--- base/tools/psb_sspasb.f90 | 73 ++--- base/tools/psb_zspasb.f90 | 73 ++--- 16 files changed, 1457 insertions(+), 625 deletions(-) diff --git a/base/modules/serial/psb_c_mat_mod.F90 b/base/modules/serial/psb_c_mat_mod.F90 index aa891381..ee819535 100644 --- a/base/modules/serial/psb_c_mat_mod.F90 +++ b/base/modules/serial/psb_c_mat_mod.F90 @@ -204,6 +204,7 @@ module psb_c_mat_mod procedure, pass(a) :: cscnv_ip => psb_c_cscnv_ip procedure, pass(a) :: cscnv_base => psb_c_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_c_split_nd procedure, pass(a) :: clone => psb_cspmat_clone procedure, pass(a) :: move_alloc => psb_cspmat_type_move ! @@ -842,6 +843,18 @@ module psb_c_mat_mod ! ! + interface + subroutine psb_c_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold + end subroutine psb_c_split_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -861,7 +874,6 @@ module psb_c_mat_mod end subroutine psb_c_cscnv end interface - interface subroutine psb_c_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat @@ -873,7 +885,6 @@ module psb_c_mat_mod end subroutine psb_c_cscnv_ip end interface - interface subroutine psb_c_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat diff --git a/base/modules/serial/psb_d_mat_mod.F90 b/base/modules/serial/psb_d_mat_mod.F90 index c647e76b..82d2e822 100644 --- a/base/modules/serial/psb_d_mat_mod.F90 +++ b/base/modules/serial/psb_d_mat_mod.F90 @@ -204,6 +204,7 @@ module psb_d_mat_mod procedure, pass(a) :: cscnv_ip => psb_d_cscnv_ip procedure, pass(a) :: cscnv_base => psb_d_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_d_split_nd procedure, pass(a) :: clone => psb_dspmat_clone procedure, pass(a) :: move_alloc => psb_dspmat_type_move ! @@ -842,6 +843,18 @@ module psb_d_mat_mod ! ! + interface + subroutine psb_d_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold + end subroutine psb_d_split_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -861,7 +874,6 @@ module psb_d_mat_mod end subroutine psb_d_cscnv end interface - interface subroutine psb_d_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat @@ -873,7 +885,6 @@ module psb_d_mat_mod end subroutine psb_d_cscnv_ip end interface - interface subroutine psb_d_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat diff --git a/base/modules/serial/psb_s_mat_mod.F90 b/base/modules/serial/psb_s_mat_mod.F90 index 3e6b286a..d8a2e6ae 100644 --- a/base/modules/serial/psb_s_mat_mod.F90 +++ b/base/modules/serial/psb_s_mat_mod.F90 @@ -204,6 +204,7 @@ module psb_s_mat_mod procedure, pass(a) :: cscnv_ip => psb_s_cscnv_ip procedure, pass(a) :: cscnv_base => psb_s_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_s_split_nd procedure, pass(a) :: clone => psb_sspmat_clone procedure, pass(a) :: move_alloc => psb_sspmat_type_move ! @@ -842,6 +843,18 @@ module psb_s_mat_mod ! ! + interface + subroutine psb_s_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold + end subroutine psb_s_split_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -861,7 +874,6 @@ module psb_s_mat_mod end subroutine psb_s_cscnv end interface - interface subroutine psb_s_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat @@ -873,7 +885,6 @@ module psb_s_mat_mod end subroutine psb_s_cscnv_ip end interface - interface subroutine psb_s_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat diff --git a/base/modules/serial/psb_z_mat_mod.F90 b/base/modules/serial/psb_z_mat_mod.F90 index 148e9ab9..694d4efc 100644 --- a/base/modules/serial/psb_z_mat_mod.F90 +++ b/base/modules/serial/psb_z_mat_mod.F90 @@ -204,6 +204,7 @@ module psb_z_mat_mod procedure, pass(a) :: cscnv_ip => psb_z_cscnv_ip procedure, pass(a) :: cscnv_base => psb_z_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_z_split_nd procedure, pass(a) :: clone => psb_zspmat_clone procedure, pass(a) :: move_alloc => psb_zspmat_type_move ! @@ -842,6 +843,18 @@ module psb_z_mat_mod ! ! + interface + subroutine psb_z_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold + end subroutine psb_z_split_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -861,7 +874,6 @@ module psb_z_mat_mod end subroutine psb_z_cscnv end interface - interface subroutine psb_z_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat @@ -873,7 +885,6 @@ module psb_z_mat_mod end subroutine psb_z_cscnv_ip end interface - interface subroutine psb_z_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat diff --git a/base/psblas/psb_cspmm.f90 b/base/psblas/psb_cspmm.f90 index 25a6bc56..22c6408f 100644 --- a/base/psblas/psb_cspmm.f90 +++ b/base/psblas/psb_cspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_cspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -178,34 +194,44 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - + if (allocated(a%ad)) then block - logical, parameter :: do_timings=.true. - real(psb_dpk_) :: t1, t2, t3, t4, t5 - if (do_timings) call psb_barrier(ctxt) - if (do_timings) t1= psb_wtime() - if (doswap_) call psi_swapdata(psb_swap_send_,& - & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) t2= psb_wtime() - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (do_timings) t3= psb_wtime() - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) t4= psb_wtime() - call a%and%spmm(alpha,x%v,cone,y%v,info) - if (do_timings) t5= psb_wtime() - if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 - end block - - else - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - end if - - call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,cone,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block + else + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block end if if(info /= psb_success_) then diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index 8e48c4c2..fa256276 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -194,48 +194,45 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - + if (allocated(a%ad)) then block - logical, parameter :: do_timings=.true. - real(psb_dpk_) :: t1, t2, t3, t4, t5 - !write(0,*) 'Going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() - if (do_timings) call psb_barrier(ctxt) - if (do_timings) call psb_tic(mv_phase1) - if (doswap_) call psi_swapdata(psb_swap_send_,& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) call psb_toc(mv_phase1) - if (do_timings) call psb_tic(mv_phase2) - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (do_timings) call psb_toc(mv_phase2) - if (do_timings) call psb_tic(mv_phase3) - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) call psb_toc(mv_phase3) - if (do_timings) call psb_tic(mv_phase4) - if (do_timings) t4= psb_wtime() - call a%and%spmm(alpha,x%v,done,y%v,info) - if (do_timings) call psb_toc(mv_phase4) - - end block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,done,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block else block - logical, parameter :: do_timings=.true. - real(psb_dpk_) :: t1, t2, t3, t4, t5 - if (do_timings) call psb_barrier(ctxt) - - if (do_timings) call psb_tic(mv_phase11) - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - end if - if (do_timings) call psb_toc(mv_phase11) - if (do_timings) call psb_tic(mv_phase12) - call psb_csmm(alpha,a,x,beta,y,info) - if (do_timings) call psb_toc(mv_phase12) - end block - end if + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block + end if if(info /= psb_success_) then info = psb_err_from_subroutine_non_ diff --git a/base/psblas/psb_sspmm.f90 b/base/psblas/psb_sspmm.f90 index cf8919f0..6c723831 100644 --- a/base/psblas/psb_sspmm.f90 +++ b/base/psblas/psb_sspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_sspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -178,34 +194,44 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - + if (allocated(a%ad)) then block - logical, parameter :: do_timings=.true. - real(psb_dpk_) :: t1, t2, t3, t4, t5 - if (do_timings) call psb_barrier(ctxt) - if (do_timings) t1= psb_wtime() - if (doswap_) call psi_swapdata(psb_swap_send_,& - & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) t2= psb_wtime() - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (do_timings) t3= psb_wtime() - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) t4= psb_wtime() - call a%and%spmm(alpha,x%v,sone,y%v,info) - if (do_timings) t5= psb_wtime() - if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 - end block - - else - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - end if - - call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,sone,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block + else + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block end if if(info /= psb_success_) then diff --git a/base/psblas/psb_zspmm.f90 b/base/psblas/psb_zspmm.f90 index 629fcf2b..179e4fad 100644 --- a/base/psblas/psb_zspmm.f90 +++ b/base/psblas/psb_zspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_zspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -178,34 +194,44 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - + if (allocated(a%ad)) then block - logical, parameter :: do_timings=.true. - real(psb_dpk_) :: t1, t2, t3, t4, t5 - if (do_timings) call psb_barrier(ctxt) - if (do_timings) t1= psb_wtime() - if (doswap_) call psi_swapdata(psb_swap_send_,& - & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) t2= psb_wtime() - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (do_timings) t3= psb_wtime() - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) t4= psb_wtime() - call a%and%spmm(alpha,x%v,zone,y%v,info) - if (do_timings) t5= psb_wtime() - if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 - end block - - else - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - end if - - call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,zone,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block + else + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block end if if(info /= psb_success_) then diff --git a/base/serial/impl/psb_c_mat_impl.F90 b/base/serial/impl/psb_c_mat_impl.F90 index df5c4cd9..bbac0406 100644 --- a/base/serial/impl/psb_c_mat_impl.F90 +++ b/base/serial/impl/psb_c_mat_impl.F90 @@ -1213,6 +1213,56 @@ subroutine psb_c_b_csclip(a,b,info,& end subroutine psb_c_b_csclip +subroutine psb_c_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_c_mat_mod, psb_protect_name => psb_c_split_nd + implicit none + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold + type(psb_c_coo_sparse_mat) :: acoo + type(psb_c_csr_sparse_mat), allocatable :: aclip + type(psb_c_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_split_nd + subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,54 +1296,65 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_c_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_c_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_c_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if + end if - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) - end if + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() + call altmp%cp_from_fmt(a%a, info) - call altmp%cp_from_fmt(a%a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,7 +1364,79 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) + class(psb_c_base_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_c_base_sparse_mat), intent(in), optional :: mold + + class(psb_c_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_fmt end subroutine psb_c_cscnv subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) @@ -1312,13 +1445,12 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) use psb_c_mat_mod, psb_protect_name => psb_c_cscnv_ip implicit none - class(psb_cspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_c_base_sparse_mat), intent(in), optional :: mold - class(psb_c_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1345,46 +1477,55 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - allocate(altmp, mold=mold,stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - else if (present(type)) then + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_c_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_c_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_c_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,6 +1535,77 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_c_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_c_base_sparse_mat), intent(in), optional :: mold + class(psb_c_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_fmt end subroutine psb_c_cscnv_ip diff --git a/base/serial/impl/psb_d_mat_impl.F90 b/base/serial/impl/psb_d_mat_impl.F90 index caf725d1..9af64b3f 100644 --- a/base/serial/impl/psb_d_mat_impl.F90 +++ b/base/serial/impl/psb_d_mat_impl.F90 @@ -1213,6 +1213,56 @@ subroutine psb_d_b_csclip(a,b,info,& end subroutine psb_d_b_csclip +subroutine psb_d_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_d_mat_mod, psb_protect_name => psb_d_split_nd + implicit none + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat), allocatable :: aclip + type(psb_d_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_split_nd + subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,65 +1296,64 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if -!!$ if (present(mold)) then -!!$ -!!$ allocate(altmp, mold=mold,stat=info) -!!$ -!!$ else if (present(type)) then -!!$ -!!$ select case (psb_toupper(type)) -!!$ case ('CSR') -!!$ allocate(psb_d_csr_sparse_mat :: altmp, stat=info) -!!$ case ('COO') -!!$ allocate(psb_d_coo_sparse_mat :: altmp, stat=info) -!!$ case ('CSC') -!!$ allocate(psb_d_csc_sparse_mat :: altmp, stat=info) -!!$ case default -!!$ info = psb_err_format_unknown_ -!!$ call psb_errpush(info,name,a_err=type) -!!$ goto 9999 -!!$ end select -!!$ else -!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) -!!$ end if -!!$ -!!$ if (info /= psb_success_) then -!!$ info = psb_err_alloc_dealloc_ -!!$ call psb_errpush(info,name) -!!$ goto 9999 -!!$ end if -!!$ -!!$ -!!$ if (present(dupl)) then -!!$ call altmp%set_dupl(dupl) -!!$ else if (a%is_bld()) then -!!$ ! Does this make sense at all?? Who knows.. -!!$ call altmp%set_dupl(psb_dupl_def_) -!!$ end if -!!$ -!!$ if (debug) write(psb_err_unit,*) 'Converting from ',& -!!$ & a%get_fmt(),' to ',altmp%get_fmt() -!!$ -!!$ call altmp%cp_from_fmt(a%a, info) -!!$ -!!$ if (info /= psb_success_) then -!!$ info = psb_err_from_subroutine_ -!!$ call psb_errpush(info,name,a_err="mv_from") -!!$ goto 9999 -!!$ end if -!!$ -!!$ call move_alloc(altmp,b%a) - call inner_cp_alloc(a%a,b%a,info,type,mold) - if (info /= 0) goto 9999 - if (allocated(a%ad)) then - call inner_cp_alloc(a%ad,b%ad,info,type,mold) - if (info /= 0) goto 9999 - end if - if (allocated(a%and)) then - call inner_cp_alloc(a%and,b%and,info,type,mold) - if (info /= 0) goto 9999 - end if + if (.false.) then + if (present(mold)) then + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a%a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if + end if call b%trim() call b%set_asb() @@ -1316,24 +1365,26 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) return contains - subroutine inner_cp_alloc(a,b,info,type,mold) + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) class(psb_d_base_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl character(len=*), optional, intent(in) :: type class(psb_d_base_sparse_mat), intent(in), optional :: mold - + class(psb_d_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act info = psb_success_ call psb_erractionsave(err_act) - + if (present(mold)) then - + allocate(altmp, mold=mold,stat=info) - + else if (present(type)) then - + select case (psb_toupper(type)) case ('CSR') allocate(psb_d_csr_sparse_mat :: altmp, stat=info) @@ -1347,38 +1398,45 @@ contains goto 9999 end select else -!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) end if - + if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& & a%get_fmt(),' to ',altmp%get_fmt() - + call altmp%cp_from_fmt(a, info) - + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - + call move_alloc(altmp,b) - + call psb_erractionrestore(err_act) return - - + + 9999 call psb_error_handler(err_act) - - return - end subroutine inner_cp_alloc + return + end subroutine inner_cp_fmt end subroutine psb_d_cscnv subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) @@ -1387,13 +1445,12 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) use psb_d_mat_mod, psb_protect_name => psb_d_cscnv_ip implicit none - class(psb_dspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_d_base_sparse_mat), intent(in), optional :: mold - class(psb_d_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1420,57 +1477,55 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if -!!$ if (present(mold)) then -!!$ -!!$ allocate(altmp, mold=mold,stat=info) -!!$ -!!$ else if (present(type)) then -!!$ -!!$ select case (psb_toupper(type)) -!!$ case ('CSR') -!!$ allocate(psb_d_csr_sparse_mat :: altmp, stat=info) -!!$ case ('COO') -!!$ allocate(psb_d_coo_sparse_mat :: altmp, stat=info) -!!$ case ('CSC') -!!$ allocate(psb_d_csc_sparse_mat :: altmp, stat=info) -!!$ case default -!!$ info = psb_err_format_unknown_ -!!$ call psb_errpush(info,name,a_err=type) -!!$ goto 9999 -!!$ end select -!!$ else -!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) -!!$ end if -!!$ -!!$ if (info /= psb_success_) then -!!$ info = psb_err_alloc_dealloc_ -!!$ call psb_errpush(info,name) -!!$ goto 9999 -!!$ end if -!!$ -!!$ if (debug) write(psb_err_unit,*) 'Converting in-place from ',& -!!$ & a%get_fmt(),' to ',altmp%get_fmt() -!!$ -!!$ call altmp%mv_from_fmt(a%a, info) -!!$ -!!$ if (info /= psb_success_) then -!!$ info = psb_err_from_subroutine_ -!!$ call psb_errpush(info,name,a_err="mv_from") -!!$ goto 9999 -!!$ end if -!!$ -!!$ call move_alloc(altmp,a%a) - - call inner_mv_alloc(a%a,info,type,mold) - if (info /= 0) goto 9999 - if (allocated(a%ad)) then - call inner_mv_alloc(a%ad,info,type,mold) - if (info /= 0) goto 9999 + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) + else + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - if (allocated(a%and)) then - call inner_mv_alloc(a%and,info,type,mold) - if (info /= 0) goto 9999 + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 end if + call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1481,23 +1536,24 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) return contains - subroutine inner_mv_alloc(a,info,type,mold) - class(psb_d_base_sparse_mat), intent(inout), allocatable :: a + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_d_base_sparse_mat), intent(inout), allocatable :: a integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl character(len=*), optional, intent(in) :: type class(psb_d_base_sparse_mat), intent(in), optional :: mold - class(psb_d_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act info = psb_success_ call psb_erractionsave(err_act) - + if (present(mold)) then - + allocate(altmp, mold=mold,stat=info) - + else if (present(type)) then - + select case (psb_toupper(type)) case ('CSR') allocate(psb_d_csr_sparse_mat :: altmp, stat=info) @@ -1511,37 +1567,46 @@ contains goto 9999 end select else -!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) end if - + if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& & a%get_fmt(),' to ',altmp%get_fmt() - + call altmp%mv_from_fmt(a, info) - + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - + call move_alloc(altmp,a) - + call psb_erractionrestore(err_act) return - - + + 9999 call psb_error_handler(err_act) - + return - end subroutine inner_mv_alloc + end subroutine inner_mv_fmt + end subroutine psb_d_cscnv_ip diff --git a/base/serial/impl/psb_s_mat_impl.F90 b/base/serial/impl/psb_s_mat_impl.F90 index ce7ce653..c0370774 100644 --- a/base/serial/impl/psb_s_mat_impl.F90 +++ b/base/serial/impl/psb_s_mat_impl.F90 @@ -1213,6 +1213,56 @@ subroutine psb_s_b_csclip(a,b,info,& end subroutine psb_s_b_csclip +subroutine psb_s_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_s_mat_mod, psb_protect_name => psb_s_split_nd + implicit none + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold + type(psb_s_coo_sparse_mat) :: acoo + type(psb_s_csr_sparse_mat), allocatable :: aclip + type(psb_s_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_split_nd + subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,54 +1296,65 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_s_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_s_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_s_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if + end if - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) - end if + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() + call altmp%cp_from_fmt(a%a, info) - call altmp%cp_from_fmt(a%a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,7 +1364,79 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) + class(psb_s_base_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_s_base_sparse_mat), intent(in), optional :: mold + + class(psb_s_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_fmt end subroutine psb_s_cscnv subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) @@ -1312,13 +1445,12 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) use psb_s_mat_mod, psb_protect_name => psb_s_cscnv_ip implicit none - class(psb_sspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_s_base_sparse_mat), intent(in), optional :: mold - class(psb_s_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1345,46 +1477,55 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - allocate(altmp, mold=mold,stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - else if (present(type)) then + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_s_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_s_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_s_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,6 +1535,77 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_s_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_s_base_sparse_mat), intent(in), optional :: mold + class(psb_s_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_fmt end subroutine psb_s_cscnv_ip diff --git a/base/serial/impl/psb_z_mat_impl.F90 b/base/serial/impl/psb_z_mat_impl.F90 index 2cebf9e7..20815cb0 100644 --- a/base/serial/impl/psb_z_mat_impl.F90 +++ b/base/serial/impl/psb_z_mat_impl.F90 @@ -1213,6 +1213,56 @@ subroutine psb_z_b_csclip(a,b,info,& end subroutine psb_z_b_csclip +subroutine psb_z_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_z_mat_mod, psb_protect_name => psb_z_split_nd + implicit none + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold + type(psb_z_coo_sparse_mat) :: acoo + type(psb_z_csr_sparse_mat), allocatable :: aclip + type(psb_z_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_split_nd + subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,54 +1296,65 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_z_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_z_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_z_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if + end if - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) - end if + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() + call altmp%cp_from_fmt(a%a, info) - call altmp%cp_from_fmt(a%a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,7 +1364,79 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) + class(psb_z_base_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_z_base_sparse_mat), intent(in), optional :: mold + + class(psb_z_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_fmt end subroutine psb_z_cscnv subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) @@ -1312,13 +1445,12 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) use psb_z_mat_mod, psb_protect_name => psb_z_cscnv_ip implicit none - class(psb_zspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_z_base_sparse_mat), intent(in), optional :: mold - class(psb_z_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1345,46 +1477,55 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - allocate(altmp, mold=mold,stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - else if (present(type)) then + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_z_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_z_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_z_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,6 +1535,77 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_z_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_z_base_sparse_mat), intent(in), optional :: mold + class(psb_z_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_fmt end subroutine psb_z_cscnv_ip diff --git a/base/tools/psb_cspasb.f90 b/base/tools/psb_cspasb.f90 index 8263e309..db8af75a 100644 --- a/base/tools/psb_cspasb.f90 +++ b/base/tools/psb_cspasb.f90 @@ -178,41 +178,44 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and) end if if (bld_and_) then - block - character(len=1024) :: fname - type(psb_c_coo_sparse_mat) :: acoo - type(psb_c_csr_sparse_mat), allocatable :: aclip - type(psb_c_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.true. - allocate(aclip) - call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - allocate(a%ad,mold=a%a) - call a%ad%mv_from_coo(acoo,info) - call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - if (use_ecsr) then - allocate(andclip) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) - else - allocate(a%and,mold=a%a) - call a%and%mv_from_coo(acoo,info) - end if - if (.false.) then - write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' - open(25,file=fname) - call a%ad%print(25) - close(25) - write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' - open(25,file=fname) - call a%and%print(25) - close(25) - !call andclip%set_cols(n_col) - write(*,*) me,' ',trim(name),' ad ',& - &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col - write(*,*) me,' ',trim(name),' and ',& - &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col - end if - end block +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_c_coo_sparse_mat) :: acoo +!!$ type(psb_c_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_c_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block else if (allocated(a%ad)) deallocate(a%ad) if (allocated(a%and)) deallocate(a%and) diff --git a/base/tools/psb_dspasb.f90 b/base/tools/psb_dspasb.f90 index 6beb0e6f..236568a1 100644 --- a/base/tools/psb_dspasb.f90 +++ b/base/tools/psb_dspasb.f90 @@ -178,41 +178,44 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and) end if if (bld_and_) then - block - character(len=1024) :: fname - type(psb_d_coo_sparse_mat) :: acoo - type(psb_d_csr_sparse_mat), allocatable :: aclip - type(psb_d_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.true. - allocate(aclip) - call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - allocate(a%ad,mold=a%a) - call a%ad%mv_from_coo(acoo,info) - call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - if (use_ecsr) then - allocate(andclip) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) - else - allocate(a%and,mold=a%a) - call a%and%mv_from_coo(acoo,info) - end if - if (.false.) then - write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' - open(25,file=fname) - call a%ad%print(25) - close(25) - write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' - open(25,file=fname) - call a%and%print(25) - close(25) - !call andclip%set_cols(n_col) - write(*,*) me,' ',trim(name),' ad ',& - &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col - write(*,*) me,' ',trim(name),' and ',& - &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col - end if - end block +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_d_coo_sparse_mat) :: acoo +!!$ type(psb_d_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_d_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block else if (allocated(a%ad)) deallocate(a%ad) if (allocated(a%and)) deallocate(a%and) diff --git a/base/tools/psb_sspasb.f90 b/base/tools/psb_sspasb.f90 index f273c7f4..110097c5 100644 --- a/base/tools/psb_sspasb.f90 +++ b/base/tools/psb_sspasb.f90 @@ -178,41 +178,44 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and) end if if (bld_and_) then - block - character(len=1024) :: fname - type(psb_s_coo_sparse_mat) :: acoo - type(psb_s_csr_sparse_mat), allocatable :: aclip - type(psb_s_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.true. - allocate(aclip) - call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - allocate(a%ad,mold=a%a) - call a%ad%mv_from_coo(acoo,info) - call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - if (use_ecsr) then - allocate(andclip) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) - else - allocate(a%and,mold=a%a) - call a%and%mv_from_coo(acoo,info) - end if - if (.false.) then - write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' - open(25,file=fname) - call a%ad%print(25) - close(25) - write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' - open(25,file=fname) - call a%and%print(25) - close(25) - !call andclip%set_cols(n_col) - write(*,*) me,' ',trim(name),' ad ',& - &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col - write(*,*) me,' ',trim(name),' and ',& - &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col - end if - end block +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_s_coo_sparse_mat) :: acoo +!!$ type(psb_s_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_s_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block else if (allocated(a%ad)) deallocate(a%ad) if (allocated(a%and)) deallocate(a%and) diff --git a/base/tools/psb_zspasb.f90 b/base/tools/psb_zspasb.f90 index 1a381303..2cb53368 100644 --- a/base/tools/psb_zspasb.f90 +++ b/base/tools/psb_zspasb.f90 @@ -178,41 +178,44 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and) end if if (bld_and_) then - block - character(len=1024) :: fname - type(psb_z_coo_sparse_mat) :: acoo - type(psb_z_csr_sparse_mat), allocatable :: aclip - type(psb_z_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.true. - allocate(aclip) - call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - allocate(a%ad,mold=a%a) - call a%ad%mv_from_coo(acoo,info) - call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - if (use_ecsr) then - allocate(andclip) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) - else - allocate(a%and,mold=a%a) - call a%and%mv_from_coo(acoo,info) - end if - if (.false.) then - write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' - open(25,file=fname) - call a%ad%print(25) - close(25) - write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' - open(25,file=fname) - call a%and%print(25) - close(25) - !call andclip%set_cols(n_col) - write(*,*) me,' ',trim(name),' ad ',& - &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col - write(*,*) me,' ',trim(name),' and ',& - &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col - end if - end block +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_z_coo_sparse_mat) :: acoo +!!$ type(psb_z_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_z_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block else if (allocated(a%ad)) deallocate(a%ad) if (allocated(a%and)) deallocate(a%and) From 097d63147aa9a0c15f2e069865e7881c660e8724 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 24 Jan 2024 17:58:39 +0100 Subject: [PATCH 033/110] Fix cuda dir makefile --- cuda/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cuda/Makefile b/cuda/Makefile index 0f03e359..49c5f0cb 100755 --- a/cuda/Makefile +++ b/cuda/Makefile @@ -47,7 +47,10 @@ COBJS= elldev.o hlldev.o diagdev.o hdiagdev.o vectordev.o ivectordev.o dnsdev.o OBJS=$(COBJS) $(FOBJS) -lib: objs +lib: objs ilib cudalib spgpulib + ar cur $(LIBNAME) $(OBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + objs: spgpuinc $(OBJS) iobjs cudaobjs spgpuobjs /bin/cp -p *$(.mod) $(MODDIR) @@ -60,9 +63,6 @@ spgpuobjs: spgpulib: $(MAKE) -C spgpu lib -lib: ilib cudalib spgpulib - ar cur $(LIBNAME) $(OBJS) - /bin/cp -p $(LIBNAME) $(LIBDIR) dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o: psb_i_cuda_vect_mod.o From 6433dc797efac808b0628222d328bcc90d86fe8a Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 2 Feb 2024 15:27:42 +0100 Subject: [PATCH 034/110] Fix CUDA implementation of %set_scal and %zero --- cuda/psb_c_cuda_vect_mod.F90 | 4 +--- cuda/psb_d_cuda_vect_mod.F90 | 4 +--- cuda/psb_i_cuda_vect_mod.F90 | 4 +--- cuda/psb_s_cuda_vect_mod.F90 | 4 +--- cuda/psb_z_cuda_vect_mod.F90 | 4 +--- 5 files changed, 5 insertions(+), 15 deletions(-) diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index c140dadb..9d84d23b 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -668,8 +668,7 @@ contains implicit none class(psb_c_vect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=czero - call x%set_host() + call x%set_scal(czero) end subroutine c_cuda_zero subroutine c_cuda_asb_m(n, x, info) @@ -807,7 +806,6 @@ contains if (present(first)) first_ = max(1,first) if (present(last)) last_ = min(last,last_) - if (x%is_host()) call x%sync() info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 44381c99..120bdce1 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -668,8 +668,7 @@ contains implicit none class(psb_d_vect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=dzero - call x%set_host() + call x%set_scal(dzero) end subroutine d_cuda_zero subroutine d_cuda_asb_m(n, x, info) @@ -807,7 +806,6 @@ contains if (present(first)) first_ = max(1,first) if (present(last)) last_ = min(last,last_) - if (x%is_host()) call x%sync() info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() diff --git a/cuda/psb_i_cuda_vect_mod.F90 b/cuda/psb_i_cuda_vect_mod.F90 index df8f4113..051c4bd0 100644 --- a/cuda/psb_i_cuda_vect_mod.F90 +++ b/cuda/psb_i_cuda_vect_mod.F90 @@ -651,8 +651,7 @@ contains implicit none class(psb_i_vect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=izero - call x%set_host() + call x%set_scal(izero) end subroutine i_cuda_zero subroutine i_cuda_asb_m(n, x, info) @@ -790,7 +789,6 @@ contains if (present(first)) first_ = max(1,first) if (present(last)) last_ = min(last,last_) - if (x%is_host()) call x%sync() info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 index 7778eb50..996ec46d 100644 --- a/cuda/psb_s_cuda_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -668,8 +668,7 @@ contains implicit none class(psb_s_vect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=szero - call x%set_host() + call x%set_scal(szero) end subroutine s_cuda_zero subroutine s_cuda_asb_m(n, x, info) @@ -807,7 +806,6 @@ contains if (present(first)) first_ = max(1,first) if (present(last)) last_ = min(last,last_) - if (x%is_host()) call x%sync() info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 index 53484911..9da93415 100644 --- a/cuda/psb_z_cuda_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -668,8 +668,7 @@ contains implicit none class(psb_z_vect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=zzero - call x%set_host() + call x%set_scal(zzero) end subroutine z_cuda_zero subroutine z_cuda_asb_m(n, x, info) @@ -807,7 +806,6 @@ contains if (present(first)) first_ = max(1,first) if (present(last)) last_ = min(last,last_) - if (x%is_host()) call x%sync() info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() From 14c4ff0f32ab7f6a92f964e124f1035919644e5d Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 12 Feb 2024 14:15:19 +0100 Subject: [PATCH 035/110] Added new methd for two combined axpbys --- base/modules/psblas/psb_c_psblas_mod.F90 | 14 +++++++++++++ base/modules/psblas/psb_d_psblas_mod.F90 | 14 +++++++++++++ base/modules/psblas/psb_s_psblas_mod.F90 | 14 +++++++++++++ base/modules/psblas/psb_z_psblas_mod.F90 | 14 +++++++++++++ base/modules/serial/psb_c_base_vect_mod.F90 | 19 ++++++++++++++++++ base/modules/serial/psb_c_vect_mod.F90 | 18 +++++++++++++++++ base/modules/serial/psb_d_base_vect_mod.F90 | 19 ++++++++++++++++++ base/modules/serial/psb_d_vect_mod.F90 | 18 +++++++++++++++++ base/modules/serial/psb_s_base_vect_mod.F90 | 19 ++++++++++++++++++ base/modules/serial/psb_s_vect_mod.F90 | 18 +++++++++++++++++ base/modules/serial/psb_z_base_vect_mod.F90 | 19 ++++++++++++++++++ base/modules/serial/psb_z_vect_mod.F90 | 18 +++++++++++++++++ cuda/psb_c_cuda_vect_mod.F90 | 22 +++++++++++++++++++++ cuda/psb_d_cuda_vect_mod.F90 | 22 +++++++++++++++++++++ cuda/psb_s_cuda_vect_mod.F90 | 22 +++++++++++++++++++++ cuda/psb_z_cuda_vect_mod.F90 | 22 +++++++++++++++++++++ test/pargen/psb_d_pde3d.F90 | 8 ++++---- 17 files changed, 296 insertions(+), 4 deletions(-) diff --git a/base/modules/psblas/psb_c_psblas_mod.F90 b/base/modules/psblas/psb_c_psblas_mod.F90 index 98deebd8..d660597a 100644 --- a/base/modules/psblas/psb_c_psblas_mod.F90 +++ b/base/modules/psblas/psb_c_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_c_psblas_mod end subroutine psb_caxpby end interface + interface psb_abgdxyx + subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_spk_, psb_ipk_, & + & psb_c_vect_type, psb_cspmat_type + type(psb_c_vect_type), intent (inout) :: x + type(psb_c_vect_type), intent (inout) :: y + type(psb_c_vect_type), intent (inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_cabgdxyz_vect + end interface psb_abgdxyx + interface psb_geamax function psb_camax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_spk_, psb_ipk_, & diff --git a/base/modules/psblas/psb_d_psblas_mod.F90 b/base/modules/psblas/psb_d_psblas_mod.F90 index e4988387..734ed633 100644 --- a/base/modules/psblas/psb_d_psblas_mod.F90 +++ b/base/modules/psblas/psb_d_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_d_psblas_mod end subroutine psb_daxpby end interface + interface psb_abgdxyx + subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_d_vect_type, psb_dspmat_type + type(psb_d_vect_type), intent (inout) :: x + type(psb_d_vect_type), intent (inout) :: y + type(psb_d_vect_type), intent (inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_dabgdxyz_vect + end interface psb_abgdxyx + interface psb_geamax function psb_damax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_dpk_, psb_ipk_, & diff --git a/base/modules/psblas/psb_s_psblas_mod.F90 b/base/modules/psblas/psb_s_psblas_mod.F90 index 93fe74b9..0f7d29e6 100644 --- a/base/modules/psblas/psb_s_psblas_mod.F90 +++ b/base/modules/psblas/psb_s_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_s_psblas_mod end subroutine psb_saxpby end interface + interface psb_abgdxyx + subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_spk_, psb_ipk_, & + & psb_s_vect_type, psb_sspmat_type + type(psb_s_vect_type), intent (inout) :: x + type(psb_s_vect_type), intent (inout) :: y + type(psb_s_vect_type), intent (inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_sabgdxyz_vect + end interface psb_abgdxyx + interface psb_geamax function psb_samax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_spk_, psb_ipk_, & diff --git a/base/modules/psblas/psb_z_psblas_mod.F90 b/base/modules/psblas/psb_z_psblas_mod.F90 index 06be1b82..17674600 100644 --- a/base/modules/psblas/psb_z_psblas_mod.F90 +++ b/base/modules/psblas/psb_z_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_z_psblas_mod end subroutine psb_zaxpby end interface + interface psb_abgdxyx + subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_z_vect_type, psb_zspmat_type + type(psb_z_vect_type), intent (inout) :: x + type(psb_z_vect_type), intent (inout) :: y + type(psb_z_vect_type), intent (inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_zabgdxyz_vect + end interface psb_abgdxyx + interface psb_geamax function psb_zamax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_dpk_, psb_ipk_, & diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index df15e0c9..e4f398a7 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -155,6 +155,8 @@ module psb_c_base_vect_mod procedure, pass(z) :: axpby_v2 => c_base_axpby_v2 procedure, pass(z) :: axpby_a2 => c_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => c_base_abgdxyz + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -1126,6 +1128,23 @@ contains end subroutine c_base_axpby_a2 + subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_base_vect_type), intent(inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + + end subroutine c_base_abgdxyz + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index 1a336d11..8b2941ff 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -102,6 +102,8 @@ module psb_c_vect_mod procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => c_vect_abgdxyz + procedure, pass(y) :: mlt_v => c_vect_mlt_v procedure, pass(y) :: mlt_a => c_vect_mlt_a procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2 @@ -771,6 +773,22 @@ contains end subroutine c_vect_axpby_a2 + subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_vect_type), intent(inout) :: x + class(psb_c_vect_type), intent(inout) :: y + class(psb_c_vect_type), intent(inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + + end subroutine c_vect_abgdxyz + + subroutine c_vect_mlt_v(x, y, info) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 87f5b0e4..7ad2d6e7 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -155,6 +155,8 @@ module psb_d_base_vect_mod procedure, pass(z) :: axpby_v2 => d_base_axpby_v2 procedure, pass(z) :: axpby_a2 => d_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => d_base_abgdxyz + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -1133,6 +1135,23 @@ contains end subroutine d_base_axpby_a2 + subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_base_vect_type), intent(inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + + end subroutine d_base_abgdxyz + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index 88fa3262..ef75be87 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -102,6 +102,8 @@ module psb_d_vect_mod procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => d_vect_abgdxyz + procedure, pass(y) :: mlt_v => d_vect_mlt_v procedure, pass(y) :: mlt_a => d_vect_mlt_a procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2 @@ -778,6 +780,22 @@ contains end subroutine d_vect_axpby_a2 + subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_vect_type), intent(inout) :: x + class(psb_d_vect_type), intent(inout) :: y + class(psb_d_vect_type), intent(inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + + end subroutine d_vect_abgdxyz + + subroutine d_vect_mlt_v(x, y, info) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index fccd846b..4e9c0dd3 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -155,6 +155,8 @@ module psb_s_base_vect_mod procedure, pass(z) :: axpby_v2 => s_base_axpby_v2 procedure, pass(z) :: axpby_a2 => s_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => s_base_abgdxyz + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -1133,6 +1135,23 @@ contains end subroutine s_base_axpby_a2 + subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_base_vect_type), intent(inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + + end subroutine s_base_abgdxyz + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index 7a54ecf0..34479856 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -102,6 +102,8 @@ module psb_s_vect_mod procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => s_vect_abgdxyz + procedure, pass(y) :: mlt_v => s_vect_mlt_v procedure, pass(y) :: mlt_a => s_vect_mlt_a procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2 @@ -778,6 +780,22 @@ contains end subroutine s_vect_axpby_a2 + subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_vect_type), intent(inout) :: x + class(psb_s_vect_type), intent(inout) :: y + class(psb_s_vect_type), intent(inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + + end subroutine s_vect_abgdxyz + + subroutine s_vect_mlt_v(x, y, info) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index 2a14de21..60c3c854 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -155,6 +155,8 @@ module psb_z_base_vect_mod procedure, pass(z) :: axpby_v2 => z_base_axpby_v2 procedure, pass(z) :: axpby_a2 => z_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => z_base_abgdxyz + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -1126,6 +1128,23 @@ contains end subroutine z_base_axpby_a2 + subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_base_vect_type), intent(inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + + end subroutine z_base_abgdxyz + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index e8a34859..54ddfebe 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -102,6 +102,8 @@ module psb_z_vect_mod procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => z_vect_abgdxyz + procedure, pass(y) :: mlt_v => z_vect_mlt_v procedure, pass(y) :: mlt_a => z_vect_mlt_a procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2 @@ -771,6 +773,22 @@ contains end subroutine z_vect_axpby_a2 + subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_vect_type), intent(inout) :: x + class(psb_z_vect_type), intent(inout) :: y + class(psb_z_vect_type), intent(inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + + end subroutine z_vect_abgdxyz + + subroutine z_vect_mlt_v(x, y, info) use psi_serial_mod implicit none diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index c140dadb..db988e56 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -90,6 +90,7 @@ module psb_c_cuda_vect_mod procedure, pass(x) :: dot_a => c_cuda_dot_a procedure, pass(y) :: axpby_v => c_cuda_axpby_v procedure, pass(y) :: axpby_a => c_cuda_axpby_a + procedure, pass(z) :: abgdxyz => c_cuda_abgdxyz procedure, pass(y) :: mlt_v => c_cuda_mlt_v procedure, pass(y) :: mlt_a => c_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2 @@ -911,6 +912,27 @@ contains end subroutine c_cuda_axpby_v + + subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + call z%psb_c_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ +!!$ call y%axpby(m,alpha,x,beta,info) +!!$ call z%axpby(m,gamma,y,delta,info) + + end subroutine c_cuda_abgdxyz + + subroutine c_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 44381c99..7f84807b 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -90,6 +90,7 @@ module psb_d_cuda_vect_mod procedure, pass(x) :: dot_a => d_cuda_dot_a procedure, pass(y) :: axpby_v => d_cuda_axpby_v procedure, pass(y) :: axpby_a => d_cuda_axpby_a + procedure, pass(z) :: abgdxyz => d_cuda_abgdxyz procedure, pass(y) :: mlt_v => d_cuda_mlt_v procedure, pass(y) :: mlt_a => d_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2 @@ -911,6 +912,27 @@ contains end subroutine d_cuda_axpby_v + + subroutine d_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + call z%psb_d_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ +!!$ call y%axpby(m,alpha,x,beta,info) +!!$ call z%axpby(m,gamma,y,delta,info) + + end subroutine d_cuda_abgdxyz + + subroutine d_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 index 7778eb50..8858c6d9 100644 --- a/cuda/psb_s_cuda_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -90,6 +90,7 @@ module psb_s_cuda_vect_mod procedure, pass(x) :: dot_a => s_cuda_dot_a procedure, pass(y) :: axpby_v => s_cuda_axpby_v procedure, pass(y) :: axpby_a => s_cuda_axpby_a + procedure, pass(z) :: abgdxyz => s_cuda_abgdxyz procedure, pass(y) :: mlt_v => s_cuda_mlt_v procedure, pass(y) :: mlt_a => s_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2 @@ -911,6 +912,27 @@ contains end subroutine s_cuda_axpby_v + + subroutine s_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + call z%psb_s_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ +!!$ call y%axpby(m,alpha,x,beta,info) +!!$ call z%axpby(m,gamma,y,delta,info) + + end subroutine s_cuda_abgdxyz + + subroutine s_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 index 53484911..a7243ff9 100644 --- a/cuda/psb_z_cuda_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -90,6 +90,7 @@ module psb_z_cuda_vect_mod procedure, pass(x) :: dot_a => z_cuda_dot_a procedure, pass(y) :: axpby_v => z_cuda_axpby_v procedure, pass(y) :: axpby_a => z_cuda_axpby_a + procedure, pass(z) :: abgdxyz => z_cuda_abgdxyz procedure, pass(y) :: mlt_v => z_cuda_mlt_v procedure, pass(y) :: mlt_a => z_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2 @@ -911,6 +912,27 @@ contains end subroutine z_cuda_axpby_v + + subroutine z_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + call z%psb_z_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ +!!$ call y%axpby(m,alpha,x,beta,info) +!!$ call z%axpby(m,gamma,y,delta,info) + + end subroutine z_cuda_abgdxyz + + subroutine z_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none diff --git a/test/pargen/psb_d_pde3d.F90 b/test/pargen/psb_d_pde3d.F90 index 4748569c..6e895c00 100644 --- a/test/pargen/psb_d_pde3d.F90 +++ b/test/pargen/psb_d_pde3d.F90 @@ -592,9 +592,9 @@ contains t1 = psb_wtime() if (info == psb_success_) then if (present(amold)) then - call psb_spasb(a,desc_a,info,mold=amold,bld_and=.false.) + call psb_spasb(a,desc_a,info,mold=amold) else - call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=.false.) + call psb_spasb(a,desc_a,info,afmt=afmt) end if end if call psb_barrier(ctxt) @@ -868,8 +868,8 @@ program psb_d_pde3d call psb_errpush(info,name,a_err=ch_err) goto 9999 end if - call psb_print_timers(ctxt) - call psb_exit(ctxt) + + call psb_exit(ctxt) stop 9999 call psb_error(ctxt) From 45f00e6e1963142d6532a02007c910d5ab752e97 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 12 Feb 2024 15:10:58 +0100 Subject: [PATCH 036/110] Fixed comments --- base/modules/serial/psb_c_base_vect_mod.F90 | 23 ++++++++++++++++++--- base/modules/serial/psb_d_base_vect_mod.F90 | 23 ++++++++++++++++++--- base/modules/serial/psb_s_base_vect_mod.F90 | 23 ++++++++++++++++++--- base/modules/serial/psb_z_base_vect_mod.F90 | 23 ++++++++++++++++++--- 4 files changed, 80 insertions(+), 12 deletions(-) diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index e4f398a7..793df3bc 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -1020,7 +1020,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine c_base_axpby_v(m,alpha, x, beta, y, info) @@ -1049,7 +1049,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1080,7 +1080,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine c_base_axpby_a(m,alpha, x, beta, y, info) @@ -1128,6 +1128,23 @@ contains end subroutine c_base_axpby_a2 + ! + ! ABGDXYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_abgdxyz + !! \memberof psb_c_base_vect_type + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 7ad2d6e7..29a2ccd8 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -1027,7 +1027,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine d_base_axpby_v(m,alpha, x, beta, y, info) @@ -1056,7 +1056,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1087,7 +1087,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine d_base_axpby_a(m,alpha, x, beta, y, info) @@ -1135,6 +1135,23 @@ contains end subroutine d_base_axpby_a2 + ! + ! ABGDXYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_abgdxyz + !! \memberof psb_d_base_vect_type + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index 4e9c0dd3..61ae27d2 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -1027,7 +1027,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine s_base_axpby_v(m,alpha, x, beta, y, info) @@ -1056,7 +1056,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1087,7 +1087,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine s_base_axpby_a(m,alpha, x, beta, y, info) @@ -1135,6 +1135,23 @@ contains end subroutine s_base_axpby_a2 + ! + ! ABGDXYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_abgdxyz + !! \memberof psb_s_base_vect_type + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index 60c3c854..53f3ea8e 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -1020,7 +1020,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine z_base_axpby_v(m,alpha, x, beta, y, info) @@ -1049,7 +1049,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1080,7 +1080,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine z_base_axpby_a(m,alpha, x, beta, y, info) @@ -1128,6 +1128,23 @@ contains end subroutine z_base_axpby_a2 + ! + ! ABGDXYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_abgdxyz + !! \memberof psb_z_base_vect_type + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none From ebc7c6b3b40fe21705db6f4d148128ae14410707 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 12 Feb 2024 16:29:48 +0100 Subject: [PATCH 037/110] Fix call to base%abgdxyz --- cuda/psb_c_cuda_vect_mod.F90 | 2 +- cuda/psb_d_cuda_vect_mod.F90 | 2 +- cuda/psb_s_cuda_vect_mod.F90 | 2 +- cuda/psb_z_cuda_vect_mod.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index db988e56..56cc80e6 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -923,7 +923,7 @@ contains complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - call z%psb_c_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) + call z%psb_c_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) !!$ !!$ if (x%is_dev()) call x%sync() !!$ diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 7f84807b..03e65f91 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -923,7 +923,7 @@ contains real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - call z%psb_d_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) + call z%psb_d_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) !!$ !!$ if (x%is_dev()) call x%sync() !!$ diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 index 8858c6d9..9616b3a6 100644 --- a/cuda/psb_s_cuda_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -923,7 +923,7 @@ contains real(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - call z%psb_s_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) + call z%psb_s_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) !!$ !!$ if (x%is_dev()) call x%sync() !!$ diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 index a7243ff9..1153fc61 100644 --- a/cuda/psb_z_cuda_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -923,7 +923,7 @@ contains complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - call z%psb_z_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) + call z%psb_z_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) !!$ !!$ if (x%is_dev()) call x%sync() !!$ From 83ededd02b36c458c2c2ca23ac50b570d6623c8e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 13 Feb 2024 12:54:37 +0100 Subject: [PATCH 038/110] Implementatino of abgd_xyz --- base/modules/auxil/psi_c_serial_mod.f90 | 13 ++ base/modules/auxil/psi_d_serial_mod.f90 | 13 ++ base/modules/auxil/psi_e_serial_mod.f90 | 13 ++ base/modules/auxil/psi_i2_serial_mod.f90 | 13 ++ base/modules/auxil/psi_m_serial_mod.f90 | 13 ++ base/modules/auxil/psi_s_serial_mod.f90 | 13 ++ base/modules/auxil/psi_z_serial_mod.f90 | 13 ++ base/serial/psi_c_serial_impl.F90 | 225 +++++++++++++++++++++++ base/serial/psi_d_serial_impl.F90 | 225 +++++++++++++++++++++++ base/serial/psi_e_serial_impl.F90 | 225 +++++++++++++++++++++++ base/serial/psi_i2_serial_impl.F90 | 225 +++++++++++++++++++++++ base/serial/psi_m_serial_impl.F90 | 225 +++++++++++++++++++++++ base/serial/psi_s_serial_impl.F90 | 225 +++++++++++++++++++++++ base/serial/psi_z_serial_impl.F90 | 225 +++++++++++++++++++++++ 14 files changed, 1666 insertions(+) diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90 index 0fdff04b..6926d6bd 100644 --- a/base/modules/auxil/psi_c_serial_mod.f90 +++ b/base/modules/auxil/psi_c_serial_mod.f90 @@ -99,6 +99,19 @@ module psi_c_serial_mod end subroutine psi_caxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_cabgdxyz + end interface psi_abgdxyz + interface psi_gth subroutine psi_cgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_spk_ diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90 index 0ce14dbb..42185d21 100644 --- a/base/modules/auxil/psi_d_serial_mod.f90 +++ b/base/modules/auxil/psi_d_serial_mod.f90 @@ -99,6 +99,19 @@ module psi_d_serial_mod end subroutine psi_daxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_dabgdxyz + end interface psi_abgdxyz + interface psi_gth subroutine psi_dgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_dpk_ diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90 index f0372e01..ffba06fd 100644 --- a/base/modules/auxil/psi_e_serial_mod.f90 +++ b/base/modules/auxil/psi_e_serial_mod.f90 @@ -99,6 +99,19 @@ module psi_e_serial_mod end subroutine psi_eaxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_eabgdxyz + end interface psi_abgdxyz + interface psi_gth subroutine psi_egthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90 index 70dd95e1..d61a1146 100644 --- a/base/modules/auxil/psi_i2_serial_mod.f90 +++ b/base/modules/auxil/psi_i2_serial_mod.f90 @@ -99,6 +99,19 @@ module psi_i2_serial_mod end subroutine psi_i2axpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_i2abgdxyz + end interface psi_abgdxyz + interface psi_gth subroutine psi_i2gthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90 index cfd1348e..76131d75 100644 --- a/base/modules/auxil/psi_m_serial_mod.f90 +++ b/base/modules/auxil/psi_m_serial_mod.f90 @@ -99,6 +99,19 @@ module psi_m_serial_mod end subroutine psi_maxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_mabgdxyz + end interface psi_abgdxyz + interface psi_gth subroutine psi_mgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90 index 25c4a7ef..02b96311 100644 --- a/base/modules/auxil/psi_s_serial_mod.f90 +++ b/base/modules/auxil/psi_s_serial_mod.f90 @@ -99,6 +99,19 @@ module psi_s_serial_mod end subroutine psi_saxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_sabgdxyz + end interface psi_abgdxyz + interface psi_gth subroutine psi_sgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_spk_ diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90 index b40cf05a..a86bdd70 100644 --- a/base/modules/auxil/psi_z_serial_mod.f90 +++ b/base/modules/auxil/psi_z_serial_mod.f90 @@ -99,6 +99,19 @@ module psi_z_serial_mod end subroutine psi_zaxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_zabgdxyz + end interface psi_abgdxyz + interface psi_gth subroutine psi_zgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_dpk_ diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90 index a3898349..129e8484 100644 --- a/base/serial/psi_c_serial_impl.F90 +++ b/base/serial/psi_c_serial_impl.F90 @@ -1567,3 +1567,228 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine caxpbyv2 + +subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='cabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == czero) then + if (gamma == czero) then + if (alpha == czero) then + if (delta == czero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = czero + end do + else if (delta /= czero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = delta*z(i) + end do + end if + else if (alpha /= czero) then + if (delta == czero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = czero + end do + else if (delta /= czero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= czero) then + + if (alpha == czero) then + + if (delta == czero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = czero ! gamma*y(i) + end do + + else if (delta /= czero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= czero) then + + if (delta == czero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= czero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= czero) then + + if (gamma == czero) then + if (alpha == czero) then + if (delta == czero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = czero + end do + + else if (delta /= czero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= czero) then + if (delta == czero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = czero + end do + + else if (delta /= czero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= czero) then + if (alpha == czero) then + if (delta == czero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= czero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= czero) then + if (delta == czero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= czero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_cabgdxyz diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90 index 1b5b1442..bd0c82df 100644 --- a/base/serial/psi_d_serial_impl.F90 +++ b/base/serial/psi_d_serial_impl.F90 @@ -1567,3 +1567,228 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine daxpbyv2 + +subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='dabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == dzero) then + if (gamma == dzero) then + if (alpha == dzero) then + if (delta == dzero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = dzero + end do + else if (delta /= dzero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = delta*z(i) + end do + end if + else if (alpha /= dzero) then + if (delta == dzero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = dzero + end do + else if (delta /= dzero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= dzero) then + + if (alpha == dzero) then + + if (delta == dzero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = dzero ! gamma*y(i) + end do + + else if (delta /= dzero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= dzero) then + + if (delta == dzero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= dzero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= dzero) then + + if (gamma == dzero) then + if (alpha == dzero) then + if (delta == dzero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = dzero + end do + + else if (delta /= dzero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= dzero) then + if (delta == dzero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = dzero + end do + + else if (delta /= dzero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= dzero) then + if (alpha == dzero) then + if (delta == dzero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= dzero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= dzero) then + if (delta == dzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= dzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_dabgdxyz diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90 index 9cdcdf0e..8b17aeb8 100644 --- a/base/serial/psi_e_serial_impl.F90 +++ b/base/serial/psi_e_serial_impl.F90 @@ -1567,3 +1567,228 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine eaxpbyv2 + +subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='eabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == ezero) then + if (gamma == ezero) then + if (alpha == ezero) then + if (delta == ezero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = ezero + end do + else if (delta /= ezero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = delta*z(i) + end do + end if + else if (alpha /= ezero) then + if (delta == ezero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = ezero + end do + else if (delta /= ezero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= ezero) then + + if (alpha == ezero) then + + if (delta == ezero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = ezero ! gamma*y(i) + end do + + else if (delta /= ezero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= ezero) then + + if (delta == ezero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= ezero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= ezero) then + + if (gamma == ezero) then + if (alpha == ezero) then + if (delta == ezero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = ezero + end do + + else if (delta /= ezero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= ezero) then + if (delta == ezero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = ezero + end do + + else if (delta /= ezero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= ezero) then + if (alpha == ezero) then + if (delta == ezero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= ezero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= ezero) then + if (delta == ezero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= ezero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_eabgdxyz diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90 index d25617a9..9a2c36c6 100644 --- a/base/serial/psi_i2_serial_impl.F90 +++ b/base/serial/psi_i2_serial_impl.F90 @@ -1567,3 +1567,228 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine i2axpbyv2 + +subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='i2abgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == i2zero) then + if (gamma == i2zero) then + if (alpha == i2zero) then + if (delta == i2zero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = i2zero + end do + else if (delta /= i2zero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = delta*z(i) + end do + end if + else if (alpha /= i2zero) then + if (delta == i2zero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = i2zero + end do + else if (delta /= i2zero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= i2zero) then + + if (alpha == i2zero) then + + if (delta == i2zero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = i2zero ! gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= i2zero) then + + if (delta == i2zero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= i2zero) then + + if (gamma == i2zero) then + if (alpha == i2zero) then + if (delta == i2zero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = i2zero + end do + + else if (delta /= i2zero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= i2zero) then + if (delta == i2zero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = i2zero + end do + + else if (delta /= i2zero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= i2zero) then + if (alpha == i2zero) then + if (delta == i2zero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= i2zero) then + if (delta == i2zero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_i2abgdxyz diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90 index 05c8e60f..dd114a45 100644 --- a/base/serial/psi_m_serial_impl.F90 +++ b/base/serial/psi_m_serial_impl.F90 @@ -1567,3 +1567,228 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine maxpbyv2 + +subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='mabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == mzero) then + if (gamma == mzero) then + if (alpha == mzero) then + if (delta == mzero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = mzero + end do + else if (delta /= mzero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = delta*z(i) + end do + end if + else if (alpha /= mzero) then + if (delta == mzero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = mzero + end do + else if (delta /= mzero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= mzero) then + + if (alpha == mzero) then + + if (delta == mzero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = mzero ! gamma*y(i) + end do + + else if (delta /= mzero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= mzero) then + + if (delta == mzero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= mzero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= mzero) then + + if (gamma == mzero) then + if (alpha == mzero) then + if (delta == mzero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = mzero + end do + + else if (delta /= mzero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= mzero) then + if (delta == mzero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = mzero + end do + + else if (delta /= mzero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= mzero) then + if (alpha == mzero) then + if (delta == mzero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= mzero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= mzero) then + if (delta == mzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= mzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_mabgdxyz diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90 index 26a57e68..8e2dda0f 100644 --- a/base/serial/psi_s_serial_impl.F90 +++ b/base/serial/psi_s_serial_impl.F90 @@ -1567,3 +1567,228 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine saxpbyv2 + +subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='sabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == szero) then + if (gamma == szero) then + if (alpha == szero) then + if (delta == szero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = szero + end do + else if (delta /= szero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = delta*z(i) + end do + end if + else if (alpha /= szero) then + if (delta == szero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = szero + end do + else if (delta /= szero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= szero) then + + if (alpha == szero) then + + if (delta == szero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = szero ! gamma*y(i) + end do + + else if (delta /= szero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= szero) then + + if (delta == szero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= szero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= szero) then + + if (gamma == szero) then + if (alpha == szero) then + if (delta == szero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = szero + end do + + else if (delta /= szero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= szero) then + if (delta == szero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = szero + end do + + else if (delta /= szero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= szero) then + if (alpha == szero) then + if (delta == szero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= szero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= szero) then + if (delta == szero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= szero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_sabgdxyz diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90 index 0b15b2d6..c6a7e01d 100644 --- a/base/serial/psi_z_serial_impl.F90 +++ b/base/serial/psi_z_serial_impl.F90 @@ -1567,3 +1567,228 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine zaxpbyv2 + +subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='zabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == zzero) then + if (gamma == zzero) then + if (alpha == zzero) then + if (delta == zzero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = zzero + end do + else if (delta /= zzero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = delta*z(i) + end do + end if + else if (alpha /= zzero) then + if (delta == zzero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = zzero + end do + else if (delta /= zzero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= zzero) then + + if (alpha == zzero) then + + if (delta == zzero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = zzero ! gamma*y(i) + end do + + else if (delta /= zzero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= zzero) then + + if (delta == zzero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= zzero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= zzero) then + + if (gamma == zzero) then + if (alpha == zzero) then + if (delta == zzero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = zzero + end do + + else if (delta /= zzero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= zzero) then + if (delta == zzero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = zzero + end do + + else if (delta /= zzero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= zzero) then + if (alpha == zzero) then + if (delta == zzero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= zzero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= zzero) then + if (delta == zzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= zzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_zabgdxyz From 6c53b6ec79dce2d6d1d0c985f11f015622b9f7fd Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 13 Feb 2024 15:48:43 +0100 Subject: [PATCH 039/110] Fix typo in interface for psb_abgdxyz --- base/modules/psblas/psb_c_psblas_mod.F90 | 4 ++-- base/modules/psblas/psb_d_psblas_mod.F90 | 4 ++-- base/modules/psblas/psb_s_psblas_mod.F90 | 4 ++-- base/modules/psblas/psb_z_psblas_mod.F90 | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/base/modules/psblas/psb_c_psblas_mod.F90 b/base/modules/psblas/psb_c_psblas_mod.F90 index d660597a..7f7f937c 100644 --- a/base/modules/psblas/psb_c_psblas_mod.F90 +++ b/base/modules/psblas/psb_c_psblas_mod.F90 @@ -143,7 +143,7 @@ module psb_c_psblas_mod end subroutine psb_caxpby end interface - interface psb_abgdxyx + interface psb_abgdxyz subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_spk_, psb_ipk_, & @@ -155,7 +155,7 @@ module psb_c_psblas_mod type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_cabgdxyz_vect - end interface psb_abgdxyx + end interface psb_abgdxyz interface psb_geamax function psb_camax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_d_psblas_mod.F90 b/base/modules/psblas/psb_d_psblas_mod.F90 index 734ed633..12090956 100644 --- a/base/modules/psblas/psb_d_psblas_mod.F90 +++ b/base/modules/psblas/psb_d_psblas_mod.F90 @@ -143,7 +143,7 @@ module psb_d_psblas_mod end subroutine psb_daxpby end interface - interface psb_abgdxyx + interface psb_abgdxyz subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_dpk_, psb_ipk_, & @@ -155,7 +155,7 @@ module psb_d_psblas_mod type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_dabgdxyz_vect - end interface psb_abgdxyx + end interface psb_abgdxyz interface psb_geamax function psb_damax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_s_psblas_mod.F90 b/base/modules/psblas/psb_s_psblas_mod.F90 index 0f7d29e6..7a7ce783 100644 --- a/base/modules/psblas/psb_s_psblas_mod.F90 +++ b/base/modules/psblas/psb_s_psblas_mod.F90 @@ -143,7 +143,7 @@ module psb_s_psblas_mod end subroutine psb_saxpby end interface - interface psb_abgdxyx + interface psb_abgdxyz subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_spk_, psb_ipk_, & @@ -155,7 +155,7 @@ module psb_s_psblas_mod type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_sabgdxyz_vect - end interface psb_abgdxyx + end interface psb_abgdxyz interface psb_geamax function psb_samax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_z_psblas_mod.F90 b/base/modules/psblas/psb_z_psblas_mod.F90 index 17674600..bcfe9caa 100644 --- a/base/modules/psblas/psb_z_psblas_mod.F90 +++ b/base/modules/psblas/psb_z_psblas_mod.F90 @@ -143,7 +143,7 @@ module psb_z_psblas_mod end subroutine psb_zaxpby end interface - interface psb_abgdxyx + interface psb_abgdxyz subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_dpk_, psb_ipk_, & @@ -155,7 +155,7 @@ module psb_z_psblas_mod type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_zabgdxyz_vect - end interface psb_abgdxyx + end interface psb_abgdxyz interface psb_geamax function psb_zamax(x, desc_a, info, jx,global) From 29669b56a24868b42c9bee2836fae05f9a57f480 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 13 Feb 2024 16:07:06 +0100 Subject: [PATCH 040/110] Implementation of psb_abgdxyz --- base/psblas/psb_caxpby.f90 | 82 ++++++++++++++++++++++++++++++++++++++ base/psblas/psb_daxpby.f90 | 82 ++++++++++++++++++++++++++++++++++++++ base/psblas/psb_saxpby.f90 | 82 ++++++++++++++++++++++++++++++++++++++ base/psblas/psb_zaxpby.f90 | 82 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 328 insertions(+) diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index da3dd93b..a41e6ef2 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -741,3 +741,85 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info) return end subroutine psb_caddconst_vect + + +subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_spk_, psb_ipk_, & + & psb_c_vect_type, psb_cspmat_type + type(psb_c_vect_type), intent (inout) :: x + type(psb_c_vect_type), intent (inout) :: y + type(psb_c_vect_type), intent (inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_c_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_cabgdxyz_vect + diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index c386f8f2..4805727e 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -741,3 +741,85 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info) return end subroutine psb_daddconst_vect + + +subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_d_vect_type, psb_dspmat_type + type(psb_d_vect_type), intent (inout) :: x + type(psb_d_vect_type), intent (inout) :: y + type(psb_d_vect_type), intent (inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_d_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_dabgdxyz_vect + diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 78f4d01a..581d64cd 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -741,3 +741,85 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info) return end subroutine psb_saddconst_vect + + +subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_spk_, psb_ipk_, & + & psb_s_vect_type, psb_sspmat_type + type(psb_s_vect_type), intent (inout) :: x + type(psb_s_vect_type), intent (inout) :: y + type(psb_s_vect_type), intent (inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_s_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_sabgdxyz_vect + diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index 2258f38f..df13f242 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -741,3 +741,85 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info) return end subroutine psb_zaddconst_vect + + +subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_z_vect_type, psb_zspmat_type + type(psb_z_vect_type), intent (inout) :: x + type(psb_z_vect_type), intent (inout) :: y + type(psb_z_vect_type), intent (inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_z_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_zabgdxyz_vect + From 5c3d5f023582a8b4773ad6df999ee038206d6954 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 13 Feb 2024 16:13:06 +0100 Subject: [PATCH 041/110] Silly bug in abgdxyz implementation --- base/psblas/psb_caxpby.f90 | 4 ++-- base/psblas/psb_daxpby.f90 | 4 ++-- base/psblas/psb_saxpby.f90 | 4 ++-- base/psblas/psb_zaxpby.f90 | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index a41e6ef2..f19f2caf 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -745,8 +745,8 @@ end subroutine psb_caddconst_vect subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - import :: psb_desc_type, psb_spk_, psb_ipk_, & - & psb_c_vect_type, psb_cspmat_type + use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect + implicit none type(psb_c_vect_type), intent (inout) :: x type(psb_c_vect_type), intent (inout) :: y type(psb_c_vect_type), intent (inout) :: z diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index 4805727e..690c5080 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -745,8 +745,8 @@ end subroutine psb_daddconst_vect subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - import :: psb_desc_type, psb_dpk_, psb_ipk_, & - & psb_d_vect_type, psb_dspmat_type + use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect + implicit none type(psb_d_vect_type), intent (inout) :: x type(psb_d_vect_type), intent (inout) :: y type(psb_d_vect_type), intent (inout) :: z diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 581d64cd..4b48f363 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -745,8 +745,8 @@ end subroutine psb_saddconst_vect subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - import :: psb_desc_type, psb_spk_, psb_ipk_, & - & psb_s_vect_type, psb_sspmat_type + use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect + implicit none type(psb_s_vect_type), intent (inout) :: x type(psb_s_vect_type), intent (inout) :: y type(psb_s_vect_type), intent (inout) :: z diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index df13f242..6bacacda 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -745,8 +745,8 @@ end subroutine psb_zaddconst_vect subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - import :: psb_desc_type, psb_dpk_, psb_ipk_, & - & psb_z_vect_type, psb_zspmat_type + use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect + implicit none type(psb_z_vect_type), intent (inout) :: x type(psb_z_vect_type), intent (inout) :: y type(psb_z_vect_type), intent (inout) :: z From 3121c435822da38de4e22dfe0a7c99a519728243 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 13 Feb 2024 16:16:13 +0100 Subject: [PATCH 042/110] Silly bug in abgdxyz implementation --- base/psblas/psb_caxpby.f90 | 7 ++++--- base/psblas/psb_daxpby.f90 | 7 ++++--- base/psblas/psb_saxpby.f90 | 7 ++++--- base/psblas/psb_zaxpby.f90 | 7 ++++--- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index f19f2caf..3351149b 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -757,7 +757,7 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m + integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr character(len=20) :: name, ch_err name='psb_c_addconst_vect' @@ -792,7 +792,8 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ix = ione iy = ione - m = desc_a%get_global_rows() + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() ! check vector correctness call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) @@ -811,7 +812,7 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index 690c5080..8d43b6ac 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -757,7 +757,7 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m + integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr character(len=20) :: name, ch_err name='psb_d_addconst_vect' @@ -792,7 +792,8 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ix = ione iy = ione - m = desc_a%get_global_rows() + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() ! check vector correctness call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) @@ -811,7 +812,7 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 4b48f363..6a5441cd 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -757,7 +757,7 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m + integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr character(len=20) :: name, ch_err name='psb_s_addconst_vect' @@ -792,7 +792,8 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ix = ione iy = ione - m = desc_a%get_global_rows() + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() ! check vector correctness call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) @@ -811,7 +812,7 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index 6bacacda..75f16ea8 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -757,7 +757,7 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m + integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr character(len=20) :: name, ch_err name='psb_z_addconst_vect' @@ -792,7 +792,8 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ix = ione iy = ione - m = desc_a%get_global_rows() + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() ! check vector correctness call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) @@ -811,7 +812,7 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) From 9ced67634dc17248602b5871bbc3e2aa6ccdbdb8 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 14 Feb 2024 08:52:38 +0100 Subject: [PATCH 043/110] Fix KIND for NR in axpby --- base/psblas/psb_caxpby.f90 | 4 ++-- base/psblas/psb_daxpby.f90 | 4 ++-- base/psblas/psb_saxpby.f90 | 4 ++-- base/psblas/psb_zaxpby.f90 | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index 3351149b..7c22bb06 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -756,8 +756,8 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ! locals type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m character(len=20) :: name, ch_err name='psb_c_addconst_vect' diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index 8d43b6ac..1de77647 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -756,8 +756,8 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ! locals type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m character(len=20) :: name, ch_err name='psb_d_addconst_vect' diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 6a5441cd..1b1f24e6 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -756,8 +756,8 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ! locals type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m character(len=20) :: name, ch_err name='psb_s_addconst_vect' diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index 75f16ea8..0f37a1f4 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -756,8 +756,8 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ! locals type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m character(len=20) :: name, ch_err name='psb_z_addconst_vect' From 4e611bb078d54f1eea74e6439db722414c9c269a Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 14 Feb 2024 15:55:55 +0100 Subject: [PATCH 044/110] Enable psi_abgdxyz --- base/modules/serial/psb_c_base_vect_mod.F90 | 15 +++++++++++---- base/modules/serial/psb_c_vect_mod.F90 | 2 +- base/modules/serial/psb_d_base_vect_mod.F90 | 15 +++++++++++---- base/modules/serial/psb_d_vect_mod.F90 | 2 +- base/modules/serial/psb_s_base_vect_mod.F90 | 15 +++++++++++---- base/modules/serial/psb_s_vect_mod.F90 | 2 +- base/modules/serial/psb_z_base_vect_mod.F90 | 15 +++++++++++---- base/modules/serial/psb_z_vect_mod.F90 | 2 +- base/serial/psi_c_serial_impl.F90 | 4 ++-- base/serial/psi_d_serial_impl.F90 | 4 ++-- base/serial/psi_e_serial_impl.F90 | 4 ++-- base/serial/psi_i2_serial_impl.F90 | 4 ++-- base/serial/psi_m_serial_impl.F90 | 4 ++-- base/serial/psi_s_serial_impl.F90 | 4 ++-- base/serial/psi_z_serial_impl.F90 | 4 ++-- 15 files changed, 62 insertions(+), 34 deletions(-) diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index 793df3bc..a4772103 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -1155,10 +1155,17 @@ contains complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) + if (.false.) then + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + else + if (x%is_dev().and.(alpha/=czero))) call x%sync() + if (y%is_dev().and.(beta/=czero)) call y%sync() + if (z%is_dev().and.(delta/=czero)) call z%sync() + call psi_cabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + end if end subroutine c_base_abgdxyz diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index 8b2941ff..2eebb0da 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -1152,7 +1152,7 @@ contains end if end function c_vect_nrm2_weight - + function c_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 29a2ccd8..59b43fce 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -1162,10 +1162,17 @@ contains real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) + if (.false.) then + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + else + if (x%is_dev().and.(alpha/=dzero))) call x%sync() + if (y%is_dev().and.(beta/=dzero)) call y%sync() + if (z%is_dev().and.(delta/=dzero)) call z%sync() + call psi_dabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + end if end subroutine d_base_abgdxyz diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index ef75be87..bbb966ed 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -1159,7 +1159,7 @@ contains end if end function d_vect_nrm2_weight - + function d_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index 61ae27d2..dee48ca5 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -1162,10 +1162,17 @@ contains real(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) + if (.false.) then + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + else + if (x%is_dev().and.(alpha/=szero))) call x%sync() + if (y%is_dev().and.(beta/=szero)) call y%sync() + if (z%is_dev().and.(delta/=szero)) call z%sync() + call psi_sabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + end if end subroutine s_base_abgdxyz diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index 34479856..0ffd199f 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -1159,7 +1159,7 @@ contains end if end function s_vect_nrm2_weight - + function s_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index 53f3ea8e..0ab2f945 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -1155,10 +1155,17 @@ contains complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) + if (.false.) then + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + else + if (x%is_dev().and.(alpha/=zzero))) call x%sync() + if (y%is_dev().and.(beta/=zzero)) call y%sync() + if (z%is_dev().and.(delta/=zzero)) call z%sync() + call psi_zabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + end if end subroutine z_base_abgdxyz diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index 54ddfebe..1ea1fd4a 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -1152,7 +1152,7 @@ contains end if end function z_vect_nrm2_weight - + function z_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90 index 129e8484..557220e5 100644 --- a/base/serial/psi_c_serial_impl.F90 +++ b/base/serial/psi_c_serial_impl.F90 @@ -1616,7 +1616,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_errpush(info,name,int_err) goto 9999 endif - + if (beta == czero) then if (gamma == czero) then if (alpha == czero) then @@ -1773,7 +1773,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) end do else if (delta /= czero) then - ! a n b n g n d 0 + ! a n b n g n d n !$omp parallel do private(i) do i=1,m y(i) = alpha*x(i)+beta*y(i) diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90 index bd0c82df..d423b401 100644 --- a/base/serial/psi_d_serial_impl.F90 +++ b/base/serial/psi_d_serial_impl.F90 @@ -1616,7 +1616,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_errpush(info,name,int_err) goto 9999 endif - + if (beta == dzero) then if (gamma == dzero) then if (alpha == dzero) then @@ -1773,7 +1773,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) end do else if (delta /= dzero) then - ! a n b n g n d 0 + ! a n b n g n d n !$omp parallel do private(i) do i=1,m y(i) = alpha*x(i)+beta*y(i) diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90 index 8b17aeb8..c7977c35 100644 --- a/base/serial/psi_e_serial_impl.F90 +++ b/base/serial/psi_e_serial_impl.F90 @@ -1616,7 +1616,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_errpush(info,name,int_err) goto 9999 endif - + if (beta == ezero) then if (gamma == ezero) then if (alpha == ezero) then @@ -1773,7 +1773,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) end do else if (delta /= ezero) then - ! a n b n g n d 0 + ! a n b n g n d n !$omp parallel do private(i) do i=1,m y(i) = alpha*x(i)+beta*y(i) diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90 index 9a2c36c6..ce4aff80 100644 --- a/base/serial/psi_i2_serial_impl.F90 +++ b/base/serial/psi_i2_serial_impl.F90 @@ -1616,7 +1616,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_errpush(info,name,int_err) goto 9999 endif - + if (beta == i2zero) then if (gamma == i2zero) then if (alpha == i2zero) then @@ -1773,7 +1773,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) end do else if (delta /= i2zero) then - ! a n b n g n d 0 + ! a n b n g n d n !$omp parallel do private(i) do i=1,m y(i) = alpha*x(i)+beta*y(i) diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90 index dd114a45..8d9d19f4 100644 --- a/base/serial/psi_m_serial_impl.F90 +++ b/base/serial/psi_m_serial_impl.F90 @@ -1616,7 +1616,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_errpush(info,name,int_err) goto 9999 endif - + if (beta == mzero) then if (gamma == mzero) then if (alpha == mzero) then @@ -1773,7 +1773,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) end do else if (delta /= mzero) then - ! a n b n g n d 0 + ! a n b n g n d n !$omp parallel do private(i) do i=1,m y(i) = alpha*x(i)+beta*y(i) diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90 index 8e2dda0f..df251b27 100644 --- a/base/serial/psi_s_serial_impl.F90 +++ b/base/serial/psi_s_serial_impl.F90 @@ -1616,7 +1616,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_errpush(info,name,int_err) goto 9999 endif - + if (beta == szero) then if (gamma == szero) then if (alpha == szero) then @@ -1773,7 +1773,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) end do else if (delta /= szero) then - ! a n b n g n d 0 + ! a n b n g n d n !$omp parallel do private(i) do i=1,m y(i) = alpha*x(i)+beta*y(i) diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90 index c6a7e01d..44ea5ae7 100644 --- a/base/serial/psi_z_serial_impl.F90 +++ b/base/serial/psi_z_serial_impl.F90 @@ -1616,7 +1616,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_errpush(info,name,int_err) goto 9999 endif - + if (beta == zzero) then if (gamma == zzero) then if (alpha == zzero) then @@ -1773,7 +1773,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) end do else if (delta /= zzero) then - ! a n b n g n d 0 + ! a n b n g n d n !$omp parallel do private(i) do i=1,m y(i) = alpha*x(i)+beta*y(i) From 2a40b82b5830d17dfaa8a731abe76ec8bae5fdba Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 14 Feb 2024 16:01:16 +0100 Subject: [PATCH 045/110] Fix typo in base_vect_mod --- base/modules/serial/psb_c_base_vect_mod.F90 | 2 +- base/modules/serial/psb_d_base_vect_mod.F90 | 2 +- base/modules/serial/psb_s_base_vect_mod.F90 | 2 +- base/modules/serial/psb_z_base_vect_mod.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index a4772103..b158ac64 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -1161,7 +1161,7 @@ contains call y%axpby(m,alpha,x,beta,info) call z%axpby(m,gamma,y,delta,info) else - if (x%is_dev().and.(alpha/=czero))) call x%sync() + if (x%is_dev().and.(alpha/=czero)) call x%sync() if (y%is_dev().and.(beta/=czero)) call y%sync() if (z%is_dev().and.(delta/=czero)) call z%sync() call psi_cabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 59b43fce..f53bc590 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -1168,7 +1168,7 @@ contains call y%axpby(m,alpha,x,beta,info) call z%axpby(m,gamma,y,delta,info) else - if (x%is_dev().and.(alpha/=dzero))) call x%sync() + if (x%is_dev().and.(alpha/=dzero)) call x%sync() if (y%is_dev().and.(beta/=dzero)) call y%sync() if (z%is_dev().and.(delta/=dzero)) call z%sync() call psi_dabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index dee48ca5..12626c72 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -1168,7 +1168,7 @@ contains call y%axpby(m,alpha,x,beta,info) call z%axpby(m,gamma,y,delta,info) else - if (x%is_dev().and.(alpha/=szero))) call x%sync() + if (x%is_dev().and.(alpha/=szero)) call x%sync() if (y%is_dev().and.(beta/=szero)) call y%sync() if (z%is_dev().and.(delta/=szero)) call z%sync() call psi_sabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index 0ab2f945..fe990a9e 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -1161,7 +1161,7 @@ contains call y%axpby(m,alpha,x,beta,info) call z%axpby(m,gamma,y,delta,info) else - if (x%is_dev().and.(alpha/=zzero))) call x%sync() + if (x%is_dev().and.(alpha/=zzero)) call x%sync() if (y%is_dev().and.(beta/=zzero)) call y%sync() if (z%is_dev().and.(delta/=zzero)) call z%sync() call psi_zabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) From b8f9badf954aec07365af71b4d7fc1209fcc890a Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 14 Feb 2024 20:05:52 +0100 Subject: [PATCH 046/110] Fix interface between vect and base_vect%ABGD --- base/modules/serial/psb_c_vect_mod.F90 | 2 +- base/modules/serial/psb_d_vect_mod.F90 | 2 +- base/modules/serial/psb_s_vect_mod.F90 | 2 +- base/modules/serial/psb_z_vect_mod.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index 2eebb0da..e0488def 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -784,7 +784,7 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) end subroutine c_vect_abgdxyz diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index bbb966ed..07007452 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -791,7 +791,7 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) end subroutine d_vect_abgdxyz diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index 0ffd199f..aa16a04d 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -791,7 +791,7 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) end subroutine s_vect_abgdxyz diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index 1ea1fd4a..58bf6b18 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -784,7 +784,7 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) end subroutine z_vect_abgdxyz From f4c7604f610fd91127bc3e68b8467a1638acb301 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sat, 17 Feb 2024 09:40:09 +0100 Subject: [PATCH 047/110] Fix base implementation of abgdxyz to call set_host --- base/modules/serial/psb_c_base_vect_mod.F90 | 2 ++ base/modules/serial/psb_d_base_vect_mod.F90 | 2 ++ base/modules/serial/psb_s_base_vect_mod.F90 | 2 ++ base/modules/serial/psb_z_base_vect_mod.F90 | 2 ++ 4 files changed, 8 insertions(+) diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index b158ac64..5a468d55 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -1165,6 +1165,8 @@ contains if (y%is_dev().and.(beta/=czero)) call y%sync() if (z%is_dev().and.(delta/=czero)) call z%sync() call psi_cabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() end if end subroutine c_base_abgdxyz diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index f53bc590..8f583cd3 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -1172,6 +1172,8 @@ contains if (y%is_dev().and.(beta/=dzero)) call y%sync() if (z%is_dev().and.(delta/=dzero)) call z%sync() call psi_dabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() end if end subroutine d_base_abgdxyz diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index 12626c72..85bb3bda 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -1172,6 +1172,8 @@ contains if (y%is_dev().and.(beta/=szero)) call y%sync() if (z%is_dev().and.(delta/=szero)) call z%sync() call psi_sabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() end if end subroutine s_base_abgdxyz diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index fe990a9e..b30b1586 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -1165,6 +1165,8 @@ contains if (y%is_dev().and.(beta/=zzero)) call y%sync() if (z%is_dev().and.(delta/=zzero)) call z%sync() call psi_zabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() end if end subroutine z_base_abgdxyz From a41b209144ed25837f1b8c8196c1e4b87569b02a Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sat, 17 Feb 2024 17:18:59 +0100 Subject: [PATCH 048/110] Better AXPBY implementation in CUDA. --- cuda/spgpu/kernels/caxpby.cu | 40 ++++++++++++++++++++++++++++++----- cuda/spgpu/kernels/daxpby.cu | 41 +++++++++++++++++++++++++++++++----- cuda/spgpu/kernels/saxpby.cu | 30 +++++++++++++++++++++++--- cuda/spgpu/kernels/zaxpby.cu | 29 ++++++++++++++++++++++--- 4 files changed, 124 insertions(+), 16 deletions(-) diff --git a/cuda/spgpu/kernels/caxpby.cu b/cuda/spgpu/kernels/caxpby.cu index d3d326ef..16eb87ed 100644 --- a/cuda/spgpu/kernels/caxpby.cu +++ b/cuda/spgpu/kernels/caxpby.cu @@ -32,8 +32,9 @@ extern "C" __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, cuFloatComplex *y, cuFloatComplex alpha, cuFloatComplex* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; - - if (id < n) + unsigned int gridSize = blockDim.x * gridDim.x; + for ( ; id < n; id +=gridSize) + //if (id,n) { // Since z, x and y are accessed with the same offset by the same thread, // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). @@ -45,7 +46,30 @@ __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, c } } +#if 1 +void spgpuCaxpby(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + num_mp = deviceProp.multiProcessorCount; + max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuCaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} +#else void spgpuCaxpby_(spgpuHandle_t handle, __device cuFloatComplex *z, int n, @@ -55,9 +79,15 @@ void spgpuCaxpby_(spgpuHandle_t handle, __device cuFloatComplex* x) { int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; - + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; dim3 block(BLOCK_SIZE); - dim3 grid(msize); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + num_mp = deviceProp.multiProcessorCount; + max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); spgpuCaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } @@ -86,7 +116,7 @@ void spgpuCaxpby(spgpuHandle_t handle, cudaCheckError("CUDA error on saxpby"); } - +#endif void spgpuCmaxpby(spgpuHandle_t handle, __device cuFloatComplex *z, int n, diff --git a/cuda/spgpu/kernels/daxpby.cu b/cuda/spgpu/kernels/daxpby.cu index 83724ce2..a0a163a2 100644 --- a/cuda/spgpu/kernels/daxpby.cu +++ b/cuda/spgpu/kernels/daxpby.cu @@ -16,6 +16,7 @@ #include "cudadebug.h" #include "cudalang.h" +#include extern "C" { @@ -31,8 +32,9 @@ extern "C" __global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double alpha, double* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; - - if (id < n) + unsigned int gridSize = blockDim.x * gridDim.x; + for ( ; id < n; id +=gridSize) + //if (id,n) { // Since z, x and y are accessed with the same offset by the same thread, // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). @@ -44,8 +46,9 @@ __global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double } } +#if 1 -void spgpuDaxpby_(spgpuHandle_t handle, +void spgpuDaxpby(spgpuHandle_t handle, __device double *z, int n, double beta, @@ -54,9 +57,37 @@ void spgpuDaxpby_(spgpuHandle_t handle, __device double* x) { int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + num_mp = deviceProp.multiProcessorCount; + max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + spgpuDaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} +#else +void spgpuDaxpby_(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; dim3 block(BLOCK_SIZE); - dim3 grid(msize); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + num_mp = deviceProp.multiProcessorCount; + max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); spgpuDaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } @@ -84,7 +115,7 @@ void spgpuDaxpby(spgpuHandle_t handle, cudaCheckError("CUDA error on daxpby"); } - +#endif void spgpuDmaxpby(spgpuHandle_t handle, __device double *z, int n, diff --git a/cuda/spgpu/kernels/saxpby.cu b/cuda/spgpu/kernels/saxpby.cu index 2c46f19e..42e2a7a7 100644 --- a/cuda/spgpu/kernels/saxpby.cu +++ b/cuda/spgpu/kernels/saxpby.cu @@ -30,8 +30,9 @@ extern "C" __global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alpha, float* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; - - if (id < n) + unsigned int gridSize = blockDim.x * gridDim.x; + for ( ; id < n; id +=gridSize) + //if (id,n) { // Since z, x and y are accessed with the same offset by the same thread, // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). @@ -44,6 +45,29 @@ __global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alp } +#if 1 +void spgpuSaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + num_mp = deviceProp.multiProcessorCount; + max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuSaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} +#else void spgpuSaxpby_(spgpuHandle_t handle, __device float *z, int n, @@ -83,7 +107,7 @@ void spgpuSaxpby(spgpuHandle_t handle, cudaCheckError("CUDA error on saxpby"); } - +#endif void spgpuSmaxpby(spgpuHandle_t handle, __device float *z, int n, diff --git a/cuda/spgpu/kernels/zaxpby.cu b/cuda/spgpu/kernels/zaxpby.cu index 7f9d5797..da438fc2 100644 --- a/cuda/spgpu/kernels/zaxpby.cu +++ b/cuda/spgpu/kernels/zaxpby.cu @@ -33,8 +33,9 @@ extern "C" __global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; - - if (id < n) + unsigned int gridSize = blockDim.x * gridDim.x; + for ( ; id < n; id +=gridSize) + //if (id,n) { // Since z, x and y are accessed with the same offset by the same thread, // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). @@ -46,7 +47,29 @@ __global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, } } +#if 1 +void spgpuZaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + num_mp = deviceProp.multiProcessorCount; + max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + spgpuZaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} +#else void spgpuZaxpby_(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, @@ -86,7 +109,7 @@ void spgpuZaxpby(spgpuHandle_t handle, cudaCheckError("CUDA error on daxpby"); } - +#endif void spgpuZmaxpby(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, From 864872ecacff43d40eed244a1044c0eb293db117 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sat, 17 Feb 2024 17:28:32 +0100 Subject: [PATCH 049/110] Intermediate implementation of abgdxyz on cuda --- cuda/psb_d_cuda_vect_mod.F90 | 39 ++++++++++++++++++++++++++++++------ cuda/spgpu/vector.h | 15 +++++++++++++- 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 03e65f91..fe5d3a38 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -923,13 +923,40 @@ contains real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - call z%psb_d_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) -!!$ -!!$ if (x%is_dev()) call x%sync() -!!$ -!!$ call y%axpby(m,alpha,x,beta,info) -!!$ call z%axpby(m,gamma,y,delta,info) + + info = psb_success_ + if (.false.) then + + select type(xx => x) + type is (psb_d_vect_cuda) + ! Do something different here + if ((beta /= dzero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx Date: Sat, 17 Feb 2024 17:46:09 +0100 Subject: [PATCH 050/110] Intermediate impl of ABGDXYZ --- cuda/psb_d_cuda_vect_mod.F90 | 54 +++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index fe5d3a38..36fac14e 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -922,33 +922,47 @@ contains class(psb_d_vect_cuda), intent(inout) :: z real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - + integer(psb_ipk_) :: nx, ny, nz + logical :: gpu_done info = psb_success_ if (.false.) then - + gpu_done = .false. select type(xx => x) - type is (psb_d_vect_cuda) - ! Do something different here - if ((beta /= dzero).and.y%is_host())& - & call y%sync() - if (xx%is_host()) call xx%sync() - nx = getMultiVecDeviceSize(xx%deviceVect) - ny = getMultiVecDeviceSize(y%deviceVect) - if ((nx y) + class is (psb_d_vect_cuda) + select type(zz => z) + class is (psb_d_vect_cuda) + ! Do something different here + if ((beta /= dzero).and.yy%is_host())& + & call yy%sync() + if ((delta /= dzero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx Date: Sat, 17 Feb 2024 18:20:12 +0100 Subject: [PATCH 051/110] New implementation for ABGDXYZ in CUDA --- cuda/dvectordev.c | 24 +++++++++++ cuda/psb_d_cuda_vect_mod.F90 | 3 +- cuda/psb_d_vectordev_mod.F90 | 13 ++++++ cuda/spgpu/kernels/Makefile | 2 +- cuda/spgpu/kernels/dabgdxyz.cu | 79 ++++++++++++++++++++++++++++++++++ 5 files changed, 119 insertions(+), 2 deletions(-) create mode 100644 cuda/spgpu/kernels/dabgdxyz.cu diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c index 39aa5b2a..785753dd 100644 --- a/cuda/dvectordev.c +++ b/cuda/dvectordev.c @@ -241,6 +241,30 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX, return(i); } + +int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + +#if 1 + spgpuDabgdxyz(handle,n, alpha,beta,gamma,delta, + (double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_); +#else + for(j=0;jcount_;j++) + spgpuDaxpby(handle,(double*)devVecY->v_+pitch*j, n, beta, + (double*)devVecY->v_+pitch*j, alpha,(double*) devVecX->v_+pitch*j); +#endif + return(i); +} + int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 36fac14e..8256eaa0 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -947,7 +947,8 @@ contains if ((nx + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuDabgdxyz_krn(int n, double alpha, double beta, double gamma, double delta, + double* x, double *y, double *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + double t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (beta == 0.0) + t = PREC_DMUL(alpha,x[id]); + else + t = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + if (delta == 0.0) + z[id] = gamma * t; + else + z[id] = PREC_DADD(PREC_DMUL(gamma, t), PREC_DMUL(delta,z[id])); + y[id] = t; + } +} + + +void spgpuDabgdxyz(spgpuHandle_t handle, + int n, + double alpha, + double beta, + double gamma, + double delta, + __device double* x, + __device double* y, + __device double *z) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + num_mp = deviceProp.multiProcessorCount; + max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuDabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + From f9677bc8920a187bdb0184e2879da80932fe6a55 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sat, 17 Feb 2024 18:42:56 +0100 Subject: [PATCH 052/110] Enabled new CUDA version of ABGDXYZ --- cuda/psb_d_cuda_vect_mod.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 8256eaa0..f2ef2be3 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -927,7 +927,7 @@ contains info = psb_success_ - if (.false.) then + if (.true.) then gpu_done = .false. select type(xx => x) class is (psb_d_vect_cuda) From 1ba8dfc7b7079eed0e86e90539e9b7c7daffe701 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sun, 18 Feb 2024 10:31:32 +0100 Subject: [PATCH 053/110] Switch FOR and IF in AXPBY --- cuda/spgpu/kernels/caxpby.cu | 25 +++++++++++++++---------- cuda/spgpu/kernels/daxpby.cu | 25 +++++++++++++++---------- cuda/spgpu/kernels/saxpby.cu | 25 +++++++++++++++---------- cuda/spgpu/kernels/zaxpby.cu | 25 +++++++++++++++---------- 4 files changed, 60 insertions(+), 40 deletions(-) diff --git a/cuda/spgpu/kernels/caxpby.cu b/cuda/spgpu/kernels/caxpby.cu index 16eb87ed..33deecbc 100644 --- a/cuda/spgpu/kernels/caxpby.cu +++ b/cuda/spgpu/kernels/caxpby.cu @@ -33,16 +33,21 @@ __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, c { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; unsigned int gridSize = blockDim.x * gridDim.x; - for ( ; id < n; id +=gridSize) - //if (id,n) - { - // Since z, x and y are accessed with the same offset by the same thread, - // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). - - if (cuFloatComplex_isZero(beta)) - z[id] = cuCmulf(alpha,x[id]); - else - z[id] = cuCfmaf(beta, y[id], cuCmulf(alpha, x[id])); + if (cuFloatComplex_isZero(beta)) { + for ( ; id < n; id +=gridSize) + //if (id,n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + z[id] = cuCmulf(alpha,x[id]); + } + } else { + for ( ; id < n; id +=gridSize) + //if (id,n) + { + z[id] = cuCfmaf(beta, y[id], cuCmulf(alpha, x[id])); + } } } diff --git a/cuda/spgpu/kernels/daxpby.cu b/cuda/spgpu/kernels/daxpby.cu index a0a163a2..ce7c0dd4 100644 --- a/cuda/spgpu/kernels/daxpby.cu +++ b/cuda/spgpu/kernels/daxpby.cu @@ -33,16 +33,21 @@ __global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; unsigned int gridSize = blockDim.x * gridDim.x; - for ( ; id < n; id +=gridSize) - //if (id,n) - { - // Since z, x and y are accessed with the same offset by the same thread, - // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). - - if (beta == 0.0) - z[id] = PREC_DMUL(alpha,x[id]); - else - z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + if (beta == 0.0) { + for ( ; id < n; id +=gridSize) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + z[id] = PREC_DMUL(alpha,x[id]); + } + } else { + for ( ; id < n; id +=gridSize) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + } } } diff --git a/cuda/spgpu/kernels/saxpby.cu b/cuda/spgpu/kernels/saxpby.cu index 42e2a7a7..36c3cdbe 100644 --- a/cuda/spgpu/kernels/saxpby.cu +++ b/cuda/spgpu/kernels/saxpby.cu @@ -31,16 +31,21 @@ __global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alp { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; unsigned int gridSize = blockDim.x * gridDim.x; - for ( ; id < n; id +=gridSize) - //if (id,n) - { - // Since z, x and y are accessed with the same offset by the same thread, - // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). - - if (beta == 0.0f) - z[id] = PREC_FMUL(alpha,x[id]); - else - z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + if (beta == 0.0f) { + for ( ; id < n; id +=gridSize) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + z[id] = PREC_FMUL(alpha,x[id]); + } + } else { + for ( ; id < n; id +=gridSize) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + } } } diff --git a/cuda/spgpu/kernels/zaxpby.cu b/cuda/spgpu/kernels/zaxpby.cu index da438fc2..8aec3e17 100644 --- a/cuda/spgpu/kernels/zaxpby.cu +++ b/cuda/spgpu/kernels/zaxpby.cu @@ -34,16 +34,21 @@ __global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; unsigned int gridSize = blockDim.x * gridDim.x; - for ( ; id < n; id +=gridSize) - //if (id,n) - { - // Since z, x and y are accessed with the same offset by the same thread, - // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). - - if (cuDoubleComplex_isZero(beta)) - z[id] = cuCmul(alpha,x[id]); - else - z[id] = cuCfma(alpha, x[id], cuCmul(beta,y[id])); + if (cuDoubleComplex_isZero(beta)) { + for ( ; id < n; id +=gridSize) + //if (id,n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + z[id] = cuCmul(alpha,x[id]); + } + } else { + for ( ; id < n; id +=gridSize) + //if (id,n) + { + z[id] = cuCfma(beta, y[id], cuCmul(alpha, x[id])); + } } } From 35d68aa4e326dc0b11a18cdd4f15ec62ec3802f4 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sun, 18 Feb 2024 16:44:37 +0100 Subject: [PATCH 054/110] Reuse calls to getDeviceProperties done at init time --- cuda/spgpu/kernels/caxpby.cu | 29 ++++++++++++++++++++++++----- cuda/spgpu/kernels/dabgdxyz.cu | 8 ++++---- cuda/spgpu/kernels/daxpby.cu | 34 ++++++++++++++++++++++++---------- cuda/spgpu/kernels/saxpby.cu | 33 +++++++++++++++++++++++++++------ cuda/spgpu/kernels/zaxpby.cu | 28 +++++++++++++++++++++++----- 5 files changed, 102 insertions(+), 30 deletions(-) diff --git a/cuda/spgpu/kernels/caxpby.cu b/cuda/spgpu/kernels/caxpby.cu index 33deecbc..3e97f75f 100644 --- a/cuda/spgpu/kernels/caxpby.cu +++ b/cuda/spgpu/kernels/caxpby.cu @@ -22,6 +22,9 @@ extern "C" { #include "core.h" #include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); + //#include "cuda_util.h" } @@ -29,6 +32,8 @@ extern "C" #define BLOCK_SIZE 512 +#if 1 + __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, cuFloatComplex *y, cuFloatComplex alpha, cuFloatComplex* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; @@ -51,7 +56,6 @@ __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, c } } -#if 1 void spgpuCaxpby(spgpuHandle_t handle, __device cuFloatComplex *z, int n, @@ -63,10 +67,8 @@ void spgpuCaxpby(spgpuHandle_t handle, int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; int num_mp, max_threads_mp, num_blocks_mp, num_blocks; dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); num_blocks_mp = max_threads_mp/BLOCK_SIZE; num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); @@ -75,6 +77,23 @@ void spgpuCaxpby(spgpuHandle_t handle, } #else + +__global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, cuFloatComplex *y, cuFloatComplex alpha, cuFloatComplex* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (cuFloatComplex_isZero(beta)) + z[id] = cuCmulf(alpha,x[id]); + else + z[id] = cuCfmaf(beta, y[id], cuCmulf(alpha, x[id])); + } +} + void spgpuCaxpby_(spgpuHandle_t handle, __device cuFloatComplex *z, int n, diff --git a/cuda/spgpu/kernels/dabgdxyz.cu b/cuda/spgpu/kernels/dabgdxyz.cu index 525371d3..f2b18e02 100644 --- a/cuda/spgpu/kernels/dabgdxyz.cu +++ b/cuda/spgpu/kernels/dabgdxyz.cu @@ -22,6 +22,8 @@ extern "C" { #include "core.h" #include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); } @@ -65,10 +67,8 @@ void spgpuDabgdxyz(spgpuHandle_t handle, int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; int num_mp, max_threads_mp, num_blocks_mp, num_blocks; dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); num_blocks_mp = max_threads_mp/BLOCK_SIZE; num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); diff --git a/cuda/spgpu/kernels/daxpby.cu b/cuda/spgpu/kernels/daxpby.cu index ce7c0dd4..fa87d996 100644 --- a/cuda/spgpu/kernels/daxpby.cu +++ b/cuda/spgpu/kernels/daxpby.cu @@ -22,6 +22,9 @@ extern "C" { #include "core.h" #include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); + //#include "cuda_util.h" } @@ -29,6 +32,8 @@ extern "C" #define BLOCK_SIZE 512 + +#if 1 __global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double alpha, double* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; @@ -36,23 +41,17 @@ __global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double if (beta == 0.0) { for ( ; id < n; id +=gridSize) { - // Since z, x and y are accessed with the same offset by the same thread, - // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). z[id] = PREC_DMUL(alpha,x[id]); } } else { for ( ; id < n; id +=gridSize) { - // Since z, x and y are accessed with the same offset by the same thread, - // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); } } } -#if 1 - void spgpuDaxpby(spgpuHandle_t handle, __device double *z, int n, @@ -64,10 +63,8 @@ void spgpuDaxpby(spgpuHandle_t handle, int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; int num_mp, max_threads_mp, num_blocks_mp, num_blocks; dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); num_blocks_mp = max_threads_mp/BLOCK_SIZE; num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); @@ -75,6 +72,23 @@ void spgpuDaxpby(spgpuHandle_t handle, spgpuDaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } #else + +__global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double alpha, double* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (beta == 0.0) + z[id] = PREC_DMUL(alpha,x[id]); + else + z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + } +} + void spgpuDaxpby_(spgpuHandle_t handle, __device double *z, int n, diff --git a/cuda/spgpu/kernels/saxpby.cu b/cuda/spgpu/kernels/saxpby.cu index 36c3cdbe..2f06e39c 100644 --- a/cuda/spgpu/kernels/saxpby.cu +++ b/cuda/spgpu/kernels/saxpby.cu @@ -20,6 +20,9 @@ extern "C" { #include "core.h" #include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); + //#include "cuda_util.h" } @@ -27,6 +30,8 @@ extern "C" #define BLOCK_SIZE 512 + +#if 1 __global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alpha, float* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; @@ -49,8 +54,6 @@ __global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alp } } - -#if 1 void spgpuSaxpby(spgpuHandle_t handle, __device float *z, int n, @@ -62,17 +65,35 @@ void spgpuSaxpby(spgpuHandle_t handle, int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; int num_mp, max_threads_mp, num_blocks_mp, num_blocks; dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); num_blocks_mp = max_threads_mp/BLOCK_SIZE; num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); spgpuSaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } + #else + +__global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alpha, float* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (beta == 0.0f) + z[id] = PREC_FMUL(alpha,x[id]); + else + z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + } +} + + + void spgpuSaxpby_(spgpuHandle_t handle, __device float *z, int n, diff --git a/cuda/spgpu/kernels/zaxpby.cu b/cuda/spgpu/kernels/zaxpby.cu index 8aec3e17..8efc40d2 100644 --- a/cuda/spgpu/kernels/zaxpby.cu +++ b/cuda/spgpu/kernels/zaxpby.cu @@ -23,6 +23,9 @@ extern "C" { #include "core.h" #include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); + //#include "cuda_util.h" } @@ -30,6 +33,7 @@ extern "C" #define BLOCK_SIZE 512 +#if 1 __global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; @@ -52,7 +56,6 @@ __global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, } } -#if 1 void spgpuZaxpby(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, @@ -64,10 +67,8 @@ void spgpuZaxpby(spgpuHandle_t handle, int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; int num_mp, max_threads_mp, num_blocks_mp, num_blocks; dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); num_blocks_mp = max_threads_mp/BLOCK_SIZE; num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); @@ -75,6 +76,23 @@ void spgpuZaxpby(spgpuHandle_t handle, spgpuZaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } #else +__global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (cuDoubleComplex_isZero(beta)) + z[id] = cuCmul(alpha,x[id]); + else + z[id] = cuCfma(alpha, x[id], cuCmul(beta,y[id])); + } +} + + void spgpuZaxpby_(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, From 0568a83734e7148c6c8676669009103d0f6c6455 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 19 Feb 2024 10:53:23 +0100 Subject: [PATCH 055/110] Fix ifdef and old code --- cuda/spgpu/kernels/caxpby.cu | 12 ++++-------- cuda/spgpu/kernels/daxpby.cu | 12 ++++-------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/cuda/spgpu/kernels/caxpby.cu b/cuda/spgpu/kernels/caxpby.cu index 3e97f75f..817fdf53 100644 --- a/cuda/spgpu/kernels/caxpby.cu +++ b/cuda/spgpu/kernels/caxpby.cu @@ -78,6 +78,7 @@ void spgpuCaxpby(spgpuHandle_t handle, #else + __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, cuFloatComplex *y, cuFloatComplex alpha, cuFloatComplex* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; @@ -94,6 +95,7 @@ __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, c } } + void spgpuCaxpby_(spgpuHandle_t handle, __device cuFloatComplex *z, int n, @@ -103,15 +105,9 @@ void spgpuCaxpby_(spgpuHandle_t handle, __device cuFloatComplex* x) { int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; - int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; - num_blocks_mp = max_threads_mp/BLOCK_SIZE; - num_blocks = num_blocks_mp*num_mp; - dim3 grid(num_blocks); + dim3 grid(msize); spgpuCaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } diff --git a/cuda/spgpu/kernels/daxpby.cu b/cuda/spgpu/kernels/daxpby.cu index fa87d996..e4823b34 100644 --- a/cuda/spgpu/kernels/daxpby.cu +++ b/cuda/spgpu/kernels/daxpby.cu @@ -89,6 +89,7 @@ __global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double } } + void spgpuDaxpby_(spgpuHandle_t handle, __device double *z, int n, @@ -98,15 +99,9 @@ void spgpuDaxpby_(spgpuHandle_t handle, __device double* x) { int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; - int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; - num_blocks_mp = max_threads_mp/BLOCK_SIZE; - num_blocks = num_blocks_mp*num_mp; - dim3 grid(num_blocks); + dim3 grid(msize); spgpuDaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } @@ -134,6 +129,7 @@ void spgpuDaxpby(spgpuHandle_t handle, cudaCheckError("CUDA error on daxpby"); } + #endif void spgpuDmaxpby(spgpuHandle_t handle, __device double *z, From 93c71c43162fb6663cca9c2f0fff0e8f2ff4c47e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 20 Feb 2024 10:25:31 +0100 Subject: [PATCH 056/110] Fix %ZERO() on cuda --- cuda/psb_c_cuda_vect_mod.F90 | 6 ++-- cuda/psb_d_cuda_vect_mod.F90 | 60 ++++++------------------------------ cuda/psb_d_vectordev_mod.F90 | 13 -------- cuda/psb_i_cuda_vect_mod.F90 | 6 ++-- cuda/psb_s_cuda_vect_mod.F90 | 6 ++-- cuda/psb_z_cuda_vect_mod.F90 | 6 ++-- 6 files changed, 21 insertions(+), 76 deletions(-) diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index 56cc80e6..fca1c616 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -668,9 +668,9 @@ contains use psi_serial_mod implicit none class(psb_c_vect_cuda), intent(inout) :: x - - if (allocated(x%v)) x%v=czero - call x%set_host() + + call x%set_scal(czero) + end subroutine c_cuda_zero subroutine c_cuda_asb_m(n, x, info) diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index f2ef2be3..2220b26c 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -668,9 +668,9 @@ contains use psi_serial_mod implicit none class(psb_d_vect_cuda), intent(inout) :: x - - if (allocated(x%v)) x%v=dzero - call x%set_host() + + call x%set_scal(dzero) + end subroutine d_cuda_zero subroutine d_cuda_asb_m(n, x, info) @@ -922,56 +922,14 @@ contains class(psb_d_vect_cuda), intent(inout) :: z real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: nx, ny, nz - logical :: gpu_done - info = psb_success_ + call z%psb_d_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ +!!$ call y%axpby(m,alpha,x,beta,info) +!!$ call z%axpby(m,gamma,y,delta,info) - if (.true.) then - gpu_done = .false. - select type(xx => x) - class is (psb_d_vect_cuda) - select type(yy => y) - class is (psb_d_vect_cuda) - select type(zz => z) - class is (psb_d_vect_cuda) - ! Do something different here - if ((beta /= dzero).and.yy%is_host())& - & call yy%sync() - if ((delta /= dzero).and.zz%is_host())& - & call zz%sync() - if (xx%is_host()) call xx%sync() - nx = getMultiVecDeviceSize(xx%deviceVect) - ny = getMultiVecDeviceSize(yy%deviceVect) - nz = getMultiVecDeviceSize(zz%deviceVect) - if ((nx Date: Tue, 20 Feb 2024 12:30:07 +0100 Subject: [PATCH 057/110] X_cuda_vect%abgdxyz --- cuda/psb_c_cuda_vect_mod.F90 | 64 ++++++++++++++++++++++++++++++------ cuda/psb_d_cuda_vect_mod.F90 | 56 +++++++++++++++++++++++++++---- cuda/psb_i_cuda_vect_mod.F90 | 8 ++--- cuda/psb_s_cuda_vect_mod.F90 | 64 ++++++++++++++++++++++++++++++------ cuda/psb_z_cuda_vect_mod.F90 | 64 ++++++++++++++++++++++++++++++------ 5 files changed, 216 insertions(+), 40 deletions(-) diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index fca1c616..9b3b6fb1 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -922,13 +922,57 @@ contains class(psb_c_vect_cuda), intent(inout) :: z complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz + logical :: gpu_done + + info = psb_success_ + + if (.true.) then + gpu_done = .false. + select type(xx => x) + class is (psb_c_vect_cuda) + select type(yy => y) + class is (psb_c_vect_cuda) + select type(zz => z) + class is (psb_c_vect_cuda) + ! Do something different here + if ((beta /= czero).and.yy%is_host())& + & call yy%sync() + if ((delta /= czero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) !!$ type is (psb_c_base_multivect_type) -!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ if ((beta /= czero).and.(y%is_dev()))& !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() !!$ type is (psb_c_multivect_cuda) !!$ ! Do something different here -!!$ if ((beta /= dzero).and.y%is_host())& +!!$ if ((beta /= czero).and.y%is_host())& !!$ & call y%sync() !!$ if (xx%is_host()) call xx%sync() !!$ nx = getMultiVecDeviceSize(xx%deviceVect) @@ -1817,7 +1861,7 @@ contains implicit none class(psb_c_multivect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=dzero + if (allocated(x%v)) x%v=czero call x%set_host() end subroutine c_cuda_multi_zero diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 2220b26c..c98d66f6 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -922,13 +922,57 @@ contains class(psb_d_vect_cuda), intent(inout) :: z real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz + logical :: gpu_done + + info = psb_success_ + + if (.true.) then + gpu_done = .false. + select type(xx => x) + class is (psb_d_vect_cuda) + select type(yy => y) + class is (psb_d_vect_cuda) + select type(zz => z) + class is (psb_d_vect_cuda) + ! Do something different here + if ((beta /= dzero).and.yy%is_host())& + & call yy%sync() + if ((delta /= dzero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) !!$ type is (psb_i_base_multivect_type) -!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ if ((beta /= izero).and.(y%is_dev()))& !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() !!$ type is (psb_i_multivect_cuda) !!$ ! Do something different here -!!$ if ((beta /= dzero).and.y%is_host())& +!!$ if ((beta /= izero).and.y%is_host())& !!$ & call y%sync() !!$ if (xx%is_host()) call xx%sync() !!$ nx = getMultiVecDeviceSize(xx%deviceVect) @@ -1477,7 +1477,7 @@ contains implicit none class(psb_i_multivect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=dzero + if (allocated(x%v)) x%v=izero call x%set_host() end subroutine i_cuda_multi_zero diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 index 80c60bc3..55ed4a7d 100644 --- a/cuda/psb_s_cuda_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -922,13 +922,57 @@ contains class(psb_s_vect_cuda), intent(inout) :: z real(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz + logical :: gpu_done + + info = psb_success_ + + if (.true.) then + gpu_done = .false. + select type(xx => x) + class is (psb_s_vect_cuda) + select type(yy => y) + class is (psb_s_vect_cuda) + select type(zz => z) + class is (psb_s_vect_cuda) + ! Do something different here + if ((beta /= szero).and.yy%is_host())& + & call yy%sync() + if ((delta /= szero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) !!$ type is (psb_s_base_multivect_type) -!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ if ((beta /= szero).and.(y%is_dev()))& !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() !!$ type is (psb_s_multivect_cuda) !!$ ! Do something different here -!!$ if ((beta /= dzero).and.y%is_host())& +!!$ if ((beta /= szero).and.y%is_host())& !!$ & call y%sync() !!$ if (xx%is_host()) call xx%sync() !!$ nx = getMultiVecDeviceSize(xx%deviceVect) @@ -1817,7 +1861,7 @@ contains implicit none class(psb_s_multivect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=dzero + if (allocated(x%v)) x%v=szero call x%set_host() end subroutine s_cuda_multi_zero diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 index 9f801742..2114723b 100644 --- a/cuda/psb_z_cuda_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -922,13 +922,57 @@ contains class(psb_z_vect_cuda), intent(inout) :: z complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz + logical :: gpu_done + + info = psb_success_ + + if (.true.) then + gpu_done = .false. + select type(xx => x) + class is (psb_z_vect_cuda) + select type(yy => y) + class is (psb_z_vect_cuda) + select type(zz => z) + class is (psb_z_vect_cuda) + ! Do something different here + if ((beta /= zzero).and.yy%is_host())& + & call yy%sync() + if ((delta /= zzero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) !!$ type is (psb_z_base_multivect_type) -!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ if ((beta /= zzero).and.(y%is_dev()))& !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() !!$ type is (psb_z_multivect_cuda) !!$ ! Do something different here -!!$ if ((beta /= dzero).and.y%is_host())& +!!$ if ((beta /= zzero).and.y%is_host())& !!$ & call y%sync() !!$ if (xx%is_host()) call xx%sync() !!$ nx = getMultiVecDeviceSize(xx%deviceVect) @@ -1817,7 +1861,7 @@ contains implicit none class(psb_z_multivect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=dzero + if (allocated(x%v)) x%v=zzero call x%set_host() end subroutine z_cuda_multi_zero From 2a75d677d05da6616103c86e3ba769de16159d34 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 20 Feb 2024 13:04:40 +0100 Subject: [PATCH 058/110] ABGDXYZ in vectordev_mod --- cuda/psb_c_vectordev_mod.F90 | 13 ++++++++++++- cuda/psb_d_vectordev_mod.F90 | 13 ++++++++++++- cuda/psb_s_vectordev_mod.F90 | 13 ++++++++++++- cuda/psb_z_vectordev_mod.F90 | 13 ++++++++++++- 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/cuda/psb_c_vectordev_mod.F90 b/cuda/psb_c_vectordev_mod.F90 index b15b2371..88888f61 100644 --- a/cuda/psb_c_vectordev_mod.F90 +++ b/cuda/psb_c_vectordev_mod.F90 @@ -304,7 +304,6 @@ module psb_c_vectordev_mod end function asumMultiVecDeviceFloatComplex end interface - interface axpbyMultiVecDevice function axpbyMultiVecDeviceFloatComplex(n,alpha,deviceVecA,beta,deviceVecB) & & result(res) bind(c,name='axpbyMultiVecDeviceFloatComplex') @@ -316,6 +315,18 @@ module psb_c_vectordev_mod end function axpbyMultiVecDeviceFloatComplex end interface + interface abgdxyzMultiVecDevice + function abgdxyzMultiVecDeviceFloatComplex(n,alpha,beta,gamma,delta,deviceVecX,& + & deviceVecY,deviceVecZ) & + & result(res) bind(c,name='abgdxyzMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + type(c_float_complex), value :: alpha, beta,gamma,delta + type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ + end function abgdxyzMultiVecDeviceFloatComplex + end interface + interface axyMultiVecDevice function axyMultiVecDeviceFloatComplex(n,alpha,deviceVecA,deviceVecB) & & result(res) bind(c,name='axyMultiVecDeviceFloatComplex') diff --git a/cuda/psb_d_vectordev_mod.F90 b/cuda/psb_d_vectordev_mod.F90 index 802add96..176e8a6e 100644 --- a/cuda/psb_d_vectordev_mod.F90 +++ b/cuda/psb_d_vectordev_mod.F90 @@ -304,7 +304,6 @@ module psb_d_vectordev_mod end function asumMultiVecDeviceDouble end interface - interface axpbyMultiVecDevice function axpbyMultiVecDeviceDouble(n,alpha,deviceVecA,beta,deviceVecB) & & result(res) bind(c,name='axpbyMultiVecDeviceDouble') @@ -316,6 +315,18 @@ module psb_d_vectordev_mod end function axpbyMultiVecDeviceDouble end interface + interface abgdxyzMultiVecDevice + function abgdxyzMultiVecDeviceDouble(n,alpha,beta,gamma,delta,deviceVecX,& + & deviceVecY,deviceVecZ) & + & result(res) bind(c,name='abgdxyzMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + type(c_double), value :: alpha, beta,gamma,delta + type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ + end function abgdxyzMultiVecDeviceDouble + end interface + interface axyMultiVecDevice function axyMultiVecDeviceDouble(n,alpha,deviceVecA,deviceVecB) & & result(res) bind(c,name='axyMultiVecDeviceDouble') diff --git a/cuda/psb_s_vectordev_mod.F90 b/cuda/psb_s_vectordev_mod.F90 index 3ecabe70..73bb7445 100644 --- a/cuda/psb_s_vectordev_mod.F90 +++ b/cuda/psb_s_vectordev_mod.F90 @@ -304,7 +304,6 @@ module psb_s_vectordev_mod end function asumMultiVecDeviceFloat end interface - interface axpbyMultiVecDevice function axpbyMultiVecDeviceFloat(n,alpha,deviceVecA,beta,deviceVecB) & & result(res) bind(c,name='axpbyMultiVecDeviceFloat') @@ -316,6 +315,18 @@ module psb_s_vectordev_mod end function axpbyMultiVecDeviceFloat end interface + interface abgdxyzMultiVecDevice + function abgdxyzMultiVecDeviceFloat(n,alpha,beta,gamma,delta,deviceVecX,& + & deviceVecY,deviceVecZ) & + & result(res) bind(c,name='abgdxyzMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + type(c_float), value :: alpha, beta,gamma,delta + type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ + end function abgdxyzMultiVecDeviceFloat + end interface + interface axyMultiVecDevice function axyMultiVecDeviceFloat(n,alpha,deviceVecA,deviceVecB) & & result(res) bind(c,name='axyMultiVecDeviceFloat') diff --git a/cuda/psb_z_vectordev_mod.F90 b/cuda/psb_z_vectordev_mod.F90 index 8f07cd56..fa858acc 100644 --- a/cuda/psb_z_vectordev_mod.F90 +++ b/cuda/psb_z_vectordev_mod.F90 @@ -304,7 +304,6 @@ module psb_z_vectordev_mod end function asumMultiVecDeviceDoubleComplex end interface - interface axpbyMultiVecDevice function axpbyMultiVecDeviceDoubleComplex(n,alpha,deviceVecA,beta,deviceVecB) & & result(res) bind(c,name='axpbyMultiVecDeviceDoubleComplex') @@ -316,6 +315,18 @@ module psb_z_vectordev_mod end function axpbyMultiVecDeviceDoubleComplex end interface + interface abgdxyzMultiVecDevice + function abgdxyzMultiVecDeviceDoubleComplex(n,alpha,beta,gamma,delta,deviceVecX,& + & deviceVecY,deviceVecZ) & + & result(res) bind(c,name='abgdxyzMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + type(c_double_complex), value :: alpha, beta,gamma,delta + type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ + end function abgdxyzMultiVecDeviceDoubleComplex + end interface + interface axyMultiVecDevice function axyMultiVecDeviceDoubleComplex(n,alpha,deviceVecA,deviceVecB) & & result(res) bind(c,name='axyMultiVecDeviceDoubleComplex') From 2d3773df9887885d4758d4263871c69d0a53c6e4 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 20 Feb 2024 13:05:16 +0100 Subject: [PATCH 059/110] CUDA kernels for ABGDXYZ --- cuda/cvectordev.c | 18 ++++++++ cuda/cvectordev.h | 3 ++ cuda/dvectordev.c | 7 --- cuda/dvectordev.h | 2 + cuda/spgpu/kernels/Makefile | 6 +-- cuda/spgpu/kernels/cabgdxyz.cu | 80 ++++++++++++++++++++++++++++++++++ cuda/spgpu/kernels/sabgdxyz.cu | 79 +++++++++++++++++++++++++++++++++ cuda/spgpu/kernels/zabgdxyz.cu | 80 ++++++++++++++++++++++++++++++++++ cuda/spgpu/vector.h | 36 +++++++++++++++ cuda/svectordev.c | 17 ++++++++ cuda/svectordev.h | 2 + cuda/zvectordev.c | 18 ++++++++ cuda/zvectordev.h | 3 ++ 13 files changed, 341 insertions(+), 10 deletions(-) create mode 100644 cuda/spgpu/kernels/cabgdxyz.cu create mode 100644 cuda/spgpu/kernels/sabgdxyz.cu create mode 100644 cuda/spgpu/kernels/zabgdxyz.cu diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c index 518154d5..9db5202e 100644 --- a/cuda/cvectordev.c +++ b/cuda/cvectordev.c @@ -255,6 +255,24 @@ int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVe return(i); } +int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, + cuFloatComplex gamma, cuFloatComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCabgdxyz(handle,n, alpha,beta,gamma,delta, + (cuFloatComplex *)devVecX->v_,(cuFloatComplex *) devVecY->v_,(cuFloatComplex *) devVecZ->v_); + return(i); +} + int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB) { int i = 0; diff --git a/cuda/cvectordev.h b/cuda/cvectordev.h index 27c8984a..fc18e328 100644 --- a/cuda/cvectordev.h +++ b/cuda/cvectordev.h @@ -69,6 +69,9 @@ int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY); +int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, + cuFloatComplex gamma, cuFloatComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB, cuFloatComplex beta, void *deviceVecZ); diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c index 785753dd..b4ca95f4 100644 --- a/cuda/dvectordev.c +++ b/cuda/dvectordev.c @@ -241,7 +241,6 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX, return(i); } - int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) { int j=0, i=0; @@ -254,14 +253,8 @@ int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, do if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; -#if 1 spgpuDabgdxyz(handle,n, alpha,beta,gamma,delta, (double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_); -#else - for(j=0;jcount_;j++) - spgpuDaxpby(handle,(double*)devVecY->v_+pitch*j, n, beta, - (double*)devVecY->v_+pitch*j, alpha,(double*) devVecX->v_+pitch*j); -#endif return(i); } diff --git a/cuda/dvectordev.h b/cuda/dvectordev.h index 25905c60..81a2e8f6 100644 --- a/cuda/dvectordev.h +++ b/cuda/dvectordev.h @@ -67,6 +67,8 @@ int asumMultiVecDeviceDouble(double* y_res, int n, void* devVecA); int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY); +int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB, double beta, void *deviceVecZ); diff --git a/cuda/spgpu/kernels/Makefile b/cuda/spgpu/kernels/Makefile index 5ada698a..3e668b4e 100644 --- a/cuda/spgpu/kernels/Makefile +++ b/cuda/spgpu/kernels/Makefile @@ -11,15 +11,15 @@ LIBNAME=$(UP)/libspgpu.a CINCLUDES=-I$(INCDIR) OBJS=cabs.o camax.o casum.o caxpby.o caxy.o cdot.o cgath.o \ - cnrm2.o cscal.o cscat.o csetscal.o \ + cnrm2.o cscal.o cscat.o csetscal.o cabgdxyz.o\ dabs.o damax.o dasum.o daxpby.o daxy.o ddot.o dgath.o dabgdxyz.o\ dia_cspmv.o dia_dspmv.o dia_sspmv.o dia_zspmv.o dnrm2.o \ dscal.o dscat.o dsetscal.o ell_ccsput.o ell_cspmv.o \ ell_dcsput.o ell_dspmv.o ell_scsput.o ell_sspmv.o ell_zcsput.o ell_zspmv.o \ hdia_cspmv.o hdia_dspmv.o hdia_sspmv.o hdia_zspmv.o hell_cspmv.o hell_dspmv.o \ hell_sspmv.o hell_zspmv.o igath.o iscat.o isetscal.o sabs.o samax.o sasum.o \ - saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o \ - zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o + saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o sabgdxyz.o\ + zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o zabgdxyz.o objs: $(OBJS) lib: objs diff --git a/cuda/spgpu/kernels/cabgdxyz.cu b/cuda/spgpu/kernels/cabgdxyz.cu new file mode 100644 index 00000000..00dc6ab4 --- /dev/null +++ b/cuda/spgpu/kernels/cabgdxyz.cu @@ -0,0 +1,80 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuCabgdxyz_krn(int n, cuFloatComplex alpha, cuFloatComplex beta, + cuFloatComplex gamma, cuFloatComplex delta, + cuFloatComplex * x, cuFloatComplex *y, cuFloatComplex *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuFloatComplex t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (cuFloatComplex_isZero(beta)) + t = cuCmulf(alpha,x[id]); + else + t = cuCfmaf(alpha, x[id], cuCmulf(beta,y[id])); + if (cuFloatComplex_isZero(delta)) + z[id] = cuCmulf(gamma, t); + else + z[id] = cuCfmafmulf(gamma, t, cuCmulf(delta,z[id])); + y[id] = t; + } +} + + +void spgpuCabgdxyz(spgpuHandle_t handle, + int n, + cuFloatComplex alpha, + cuFloatComplex beta, + cuFloatComplex gamma, + cuFloatComplex delta, + __device cuFloatComplex * x, + __device cuFloatComplex * y, + __device cuFloatComplex *z) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuCabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + diff --git a/cuda/spgpu/kernels/sabgdxyz.cu b/cuda/spgpu/kernels/sabgdxyz.cu new file mode 100644 index 00000000..8c137ed3 --- /dev/null +++ b/cuda/spgpu/kernels/sabgdxyz.cu @@ -0,0 +1,79 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuSabgdxyz_krn(int n, float alpha, float beta, float gamma, float delta, + float* x, float *y, float *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + float t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (beta == 0.0) + t = PREC_FMUL(alpha,x[id]); + else + t = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + if (delta == 0.0) + z[id] = gamma * t; + else + z[id] = PREC_FADD(PREC_FMUL(gamma, t), PREC_FMUL(delta,z[id])); + y[id] = t; + } +} + + +void spgpuSabgdxyz(spgpuHandle_t handle, + int n, + float alpha, + float beta, + float gamma, + float delta, + __device float* x, + __device float* y, + __device float *z) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuSabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + diff --git a/cuda/spgpu/kernels/zabgdxyz.cu b/cuda/spgpu/kernels/zabgdxyz.cu new file mode 100644 index 00000000..48def937 --- /dev/null +++ b/cuda/spgpu/kernels/zabgdxyz.cu @@ -0,0 +1,80 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuZabgdxyz_krn(int n, cuDoubleComplex alpha, cuDoubleComplex beta, + cuDoubleComplex gamma, cuDoubleComplex delta, + cuDoubleComplex * x, cuDoubleComplex *y, cuDoubleComplex *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuDoubleComplex t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (cuDoubleComplex_isZero(beta)) + t = cuCmul(alpha,x[id]); + else + t = cuCfma(alpha, x[id], cuCmul(beta,y[id])); + if (cuDoubleComplex_isZero(delta)) + z[id] = cuCmul(gamma, t); + else + z[id] = cuCfma(gamma, t, cuCmul(delta,z[id])); + y[id] = t; + } +} + + +void spgpuZabgdxyz(spgpuHandle_t handle, + int n, + cuDoubleComplex alpha, + cuDoubleComplex beta, + cuDoubleComplex gamma, + cuDoubleComplex delta, + __device cuDoubleComplex * x, + __device cuDoubleComplex * y, + __device cuDoubleComplex *z) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuZabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + diff --git a/cuda/spgpu/vector.h b/cuda/spgpu/vector.h index 69ffedf0..9fc3e658 100644 --- a/cuda/spgpu/vector.h +++ b/cuda/spgpu/vector.h @@ -181,6 +181,18 @@ void spgpuSaxpby(spgpuHandle_t handle, float alpha, __device float* x); + +void spgpuSabgdxyz(spgpuHandle_t handle, + int n, + float alpha, + float beta, + float gamma, + float delta, + __device float* x, + __device float *y, + __device float *z) +; + /** * \fn void spgpuSmaxpby(spgpuHandle_t handle, __device float *z, int n, float beta, __device float *y, float alpha, __device float* x, int count, int pitch) * Computes the single precision z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector. @@ -755,6 +767,18 @@ void spgpuCaxpby(spgpuHandle_t handle, cuFloatComplex alpha, __device cuFloatComplex* x); + +void spgpuCabgdxyz(spgpuHandle_t handle, + int n, + cuFloatComplex alpha, + cuFloatComplex beta, + cuFloatComplex gamma, + cuFloatComplex delta, + __device cuFloatComplex* x, + __device cuFloatComplex *y, + __device cuFloatComplex *z) +; + /** * \fn void spgpuCmaxpby(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex beta, __device cuFloatComplex *y, cuFloatComplex alpha, __device cuFloatComplex* x, int count, int pitch) * Computes the single precision complex z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector. @@ -1034,6 +1058,18 @@ void spgpuZaxpby(spgpuHandle_t handle, cuDoubleComplex alpha, __device cuDoubleComplex* x); + +void spgpuZabgdxyz(spgpuHandle_t handle, + int n, + cuDoubleComplex alpha, + cuDoubleComplex beta, + cuDoubleComplex gamma, + cuDoubleComplex delta, + __device cuDoubleComplex* x, + __device cuDoubleComplex *y, + __device cuDoubleComplex *z) +; + /** * \fn void spgpuZmaxpby(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex beta, __device cuDoubleComplex *y, cuDoubleComplex alpha, __device cuDoubleComplex* x, int count, int pitch) * Computes the double precision complex z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector. diff --git a/cuda/svectordev.c b/cuda/svectordev.c index bfa4061a..b84718f5 100644 --- a/cuda/svectordev.c +++ b/cuda/svectordev.c @@ -241,6 +241,23 @@ int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX, return(i); } +int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSabgdxyz(handle,n, alpha,beta,gamma,delta, + (float*)devVecX->v_,(float*) devVecY->v_,(float*) devVecZ->v_); + return(i); +} + int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; diff --git a/cuda/svectordev.h b/cuda/svectordev.h index bf25fcb1..730f929a 100644 --- a/cuda/svectordev.h +++ b/cuda/svectordev.h @@ -67,6 +67,8 @@ int asumMultiVecDeviceFloat(float* y_res, int n, void* devVecA); int dotMultiVecDeviceFloat(float* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceFloat(int n, float alpha, void* devVecX, float beta, void* devVecY); +int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB, float beta, void *deviceVecZ); diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c index 0fb1c67e..d1f23f2a 100644 --- a/cuda/zvectordev.c +++ b/cuda/zvectordev.c @@ -234,6 +234,24 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMulti return(i); } +int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, + cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZabgdxyz(handle,n, alpha,beta,gamma,delta, + (cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_,(cuDoubleComplex *) devVecZ->v_); + return(i); +} + int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMultiVecX, cuDoubleComplex beta, void* devMultiVecY) { int j=0, i=0; diff --git a/cuda/zvectordev.h b/cuda/zvectordev.h index 96330a7a..4c32f11c 100644 --- a/cuda/zvectordev.h +++ b/cuda/zvectordev.h @@ -77,6 +77,9 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, int axpbyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void* devVecX, cuDoubleComplex beta, void* devVecY); +int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, + cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, From d95077ffd66389d11e5782ca76f26432c05cc270 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 20 Feb 2024 13:46:58 +0100 Subject: [PATCH 060/110] Fix typo in vectordev_mod --- cuda/psb_c_vectordev_mod.F90 | 4 +--- cuda/psb_d_vectordev_mod.F90 | 4 +--- cuda/psb_i_vectordev_mod.F90 | 2 -- cuda/psb_s_vectordev_mod.F90 | 4 +--- cuda/psb_z_vectordev_mod.F90 | 4 +--- 5 files changed, 4 insertions(+), 14 deletions(-) diff --git a/cuda/psb_c_vectordev_mod.F90 b/cuda/psb_c_vectordev_mod.F90 index 88888f61..20a4ac3f 100644 --- a/cuda/psb_c_vectordev_mod.F90 +++ b/cuda/psb_c_vectordev_mod.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - module psb_c_vectordev_mod use psb_base_vectordev_mod @@ -322,7 +320,7 @@ module psb_c_vectordev_mod use iso_c_binding integer(c_int) :: res integer(c_int), value :: n - type(c_float_complex), value :: alpha, beta,gamma,delta + complex(c_float_complex), value :: alpha, beta,gamma,delta type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ end function abgdxyzMultiVecDeviceFloatComplex end interface diff --git a/cuda/psb_d_vectordev_mod.F90 b/cuda/psb_d_vectordev_mod.F90 index 176e8a6e..080b27fe 100644 --- a/cuda/psb_d_vectordev_mod.F90 +++ b/cuda/psb_d_vectordev_mod.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - module psb_d_vectordev_mod use psb_base_vectordev_mod @@ -322,7 +320,7 @@ module psb_d_vectordev_mod use iso_c_binding integer(c_int) :: res integer(c_int), value :: n - type(c_double), value :: alpha, beta,gamma,delta + real(c_double), value :: alpha, beta,gamma,delta type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ end function abgdxyzMultiVecDeviceDouble end interface diff --git a/cuda/psb_i_vectordev_mod.F90 b/cuda/psb_i_vectordev_mod.F90 index 84037aaf..ce3dc5e1 100644 --- a/cuda/psb_i_vectordev_mod.F90 +++ b/cuda/psb_i_vectordev_mod.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - module psb_i_vectordev_mod use psb_base_vectordev_mod diff --git a/cuda/psb_s_vectordev_mod.F90 b/cuda/psb_s_vectordev_mod.F90 index 73bb7445..19776cbc 100644 --- a/cuda/psb_s_vectordev_mod.F90 +++ b/cuda/psb_s_vectordev_mod.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - module psb_s_vectordev_mod use psb_base_vectordev_mod @@ -322,7 +320,7 @@ module psb_s_vectordev_mod use iso_c_binding integer(c_int) :: res integer(c_int), value :: n - type(c_float), value :: alpha, beta,gamma,delta + real(c_float), value :: alpha, beta,gamma,delta type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ end function abgdxyzMultiVecDeviceFloat end interface diff --git a/cuda/psb_z_vectordev_mod.F90 b/cuda/psb_z_vectordev_mod.F90 index fa858acc..07e4ba37 100644 --- a/cuda/psb_z_vectordev_mod.F90 +++ b/cuda/psb_z_vectordev_mod.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - module psb_z_vectordev_mod use psb_base_vectordev_mod @@ -322,7 +320,7 @@ module psb_z_vectordev_mod use iso_c_binding integer(c_int) :: res integer(c_int), value :: n - type(c_double_complex), value :: alpha, beta,gamma,delta + complex(c_double_complex), value :: alpha, beta,gamma,delta type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ end function abgdxyzMultiVecDeviceDoubleComplex end interface From 0e269ed6418dd4e953ce57b6b04ce74b585f5a3e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 20 Feb 2024 13:57:32 +0100 Subject: [PATCH 061/110] typo in Cabgdxyz --- cuda/spgpu/kernels/cabgdxyz.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda/spgpu/kernels/cabgdxyz.cu b/cuda/spgpu/kernels/cabgdxyz.cu index 00dc6ab4..a85b3873 100644 --- a/cuda/spgpu/kernels/cabgdxyz.cu +++ b/cuda/spgpu/kernels/cabgdxyz.cu @@ -49,7 +49,7 @@ __global__ void spgpuCabgdxyz_krn(int n, cuFloatComplex alpha, cuFloatComplex if (cuFloatComplex_isZero(delta)) z[id] = cuCmulf(gamma, t); else - z[id] = cuCfmafmulf(gamma, t, cuCmulf(delta,z[id])); + z[id] = cuCfmaf(gamma, t, cuCmulf(delta,z[id])); y[id] = t; } } From b5d5f9766107f1d2b6c5a61885fab6df388b4cda Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 20 Feb 2024 14:04:23 +0100 Subject: [PATCH 062/110] Improve cuda%zero() --- cuda/psb_c_cuda_vect_mod.F90 | 4 +++- cuda/psb_d_cuda_vect_mod.F90 | 4 +++- cuda/psb_i_cuda_vect_mod.F90 | 4 +++- cuda/psb_s_cuda_vect_mod.F90 | 4 +++- cuda/psb_z_cuda_vect_mod.F90 | 4 +++- 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index 9b3b6fb1..7eee128f 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -668,7 +668,9 @@ contains use psi_serial_mod implicit none class(psb_c_vect_cuda), intent(inout) :: x - + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() call x%set_scal(czero) end subroutine c_cuda_zero diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index c98d66f6..1e6e9f2a 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -668,7 +668,9 @@ contains use psi_serial_mod implicit none class(psb_d_vect_cuda), intent(inout) :: x - + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() call x%set_scal(dzero) end subroutine d_cuda_zero diff --git a/cuda/psb_i_cuda_vect_mod.F90 b/cuda/psb_i_cuda_vect_mod.F90 index a018713e..903c4a08 100644 --- a/cuda/psb_i_cuda_vect_mod.F90 +++ b/cuda/psb_i_cuda_vect_mod.F90 @@ -650,7 +650,9 @@ contains use psi_serial_mod implicit none class(psb_i_vect_cuda), intent(inout) :: x - + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() call x%set_scal(izero) end subroutine i_cuda_zero diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 index 55ed4a7d..3497c33e 100644 --- a/cuda/psb_s_cuda_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -668,7 +668,9 @@ contains use psi_serial_mod implicit none class(psb_s_vect_cuda), intent(inout) :: x - + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() call x%set_scal(szero) end subroutine s_cuda_zero diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 index 2114723b..8483544c 100644 --- a/cuda/psb_z_cuda_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -668,7 +668,9 @@ contains use psi_serial_mod implicit none class(psb_z_vect_cuda), intent(inout) :: x - + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() call x%set_scal(zzero) end subroutine z_cuda_zero From 86be8ebcd0452b108be6580d8af58fa55692a08a Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 4 Mar 2024 16:29:47 +0100 Subject: [PATCH 063/110] New method W%XYZW() --- base/modules/auxil/psi_c_serial_mod.f90 | 14 ++++ base/modules/auxil/psi_d_serial_mod.f90 | 14 ++++ base/modules/auxil/psi_e_serial_mod.f90 | 14 ++++ base/modules/auxil/psi_i2_serial_mod.f90 | 14 ++++ base/modules/auxil/psi_m_serial_mod.f90 | 14 ++++ base/modules/auxil/psi_s_serial_mod.f90 | 14 ++++ base/modules/auxil/psi_z_serial_mod.f90 | 14 ++++ base/modules/serial/psb_c_base_vect_mod.F90 | 44 +++++++++---- base/modules/serial/psb_c_vect_mod.F90 | 17 +++++ base/modules/serial/psb_d_base_vect_mod.F90 | 44 +++++++++---- base/modules/serial/psb_d_vect_mod.F90 | 17 +++++ base/modules/serial/psb_s_base_vect_mod.F90 | 44 +++++++++---- base/modules/serial/psb_s_vect_mod.F90 | 17 +++++ base/modules/serial/psb_z_base_vect_mod.F90 | 44 +++++++++---- base/modules/serial/psb_z_vect_mod.F90 | 17 +++++ base/serial/psi_c_serial_impl.F90 | 72 +++++++++++++++++++++ base/serial/psi_d_serial_impl.F90 | 72 +++++++++++++++++++++ base/serial/psi_e_serial_impl.F90 | 72 +++++++++++++++++++++ base/serial/psi_i2_serial_impl.F90 | 72 +++++++++++++++++++++ base/serial/psi_m_serial_impl.F90 | 72 +++++++++++++++++++++ base/serial/psi_s_serial_impl.F90 | 72 +++++++++++++++++++++ base/serial/psi_z_serial_impl.F90 | 72 +++++++++++++++++++++ 22 files changed, 790 insertions(+), 56 deletions(-) diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90 index 6926d6bd..38b740a7 100644 --- a/base/modules/auxil/psi_c_serial_mod.f90 +++ b/base/modules/auxil/psi_c_serial_mod.f90 @@ -112,6 +112,20 @@ module psi_c_serial_mod end subroutine psi_cabgdxyz end interface psi_abgdxyz + interface psi_xyzw + subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (inout) :: w(:) + complex(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_cxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_cgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_spk_ diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90 index 42185d21..1d65c5f6 100644 --- a/base/modules/auxil/psi_d_serial_mod.f90 +++ b/base/modules/auxil/psi_d_serial_mod.f90 @@ -112,6 +112,20 @@ module psi_d_serial_mod end subroutine psi_dabgdxyz end interface psi_abgdxyz + interface psi_xyzw + subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (inout) :: w(:) + real(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_dxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_dgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_dpk_ diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90 index ffba06fd..6f4e8c06 100644 --- a/base/modules/auxil/psi_e_serial_mod.f90 +++ b/base/modules/auxil/psi_e_serial_mod.f90 @@ -112,6 +112,20 @@ module psi_e_serial_mod end subroutine psi_eabgdxyz end interface psi_abgdxyz + interface psi_xyzw + subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (inout) :: w(:) + integer(psb_epk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_exyzw + end interface psi_xyzw + interface psi_gth subroutine psi_egthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90 index d61a1146..ffa14059 100644 --- a/base/modules/auxil/psi_i2_serial_mod.f90 +++ b/base/modules/auxil/psi_i2_serial_mod.f90 @@ -112,6 +112,20 @@ module psi_i2_serial_mod end subroutine psi_i2abgdxyz end interface psi_abgdxyz + interface psi_xyzw + subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (inout) :: w(:) + integer(psb_i2pk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_i2xyzw + end interface psi_xyzw + interface psi_gth subroutine psi_i2gthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90 index 76131d75..5661fdbf 100644 --- a/base/modules/auxil/psi_m_serial_mod.f90 +++ b/base/modules/auxil/psi_m_serial_mod.f90 @@ -112,6 +112,20 @@ module psi_m_serial_mod end subroutine psi_mabgdxyz end interface psi_abgdxyz + interface psi_xyzw + subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (inout) :: w(:) + integer(psb_mpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_mxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_mgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90 index 02b96311..5cc17d58 100644 --- a/base/modules/auxil/psi_s_serial_mod.f90 +++ b/base/modules/auxil/psi_s_serial_mod.f90 @@ -112,6 +112,20 @@ module psi_s_serial_mod end subroutine psi_sabgdxyz end interface psi_abgdxyz + interface psi_xyzw + subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (inout) :: w(:) + real(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_sxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_sgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_spk_ diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90 index a86bdd70..8a3f053d 100644 --- a/base/modules/auxil/psi_z_serial_mod.f90 +++ b/base/modules/auxil/psi_z_serial_mod.f90 @@ -112,6 +112,20 @@ module psi_z_serial_mod end subroutine psi_zabgdxyz end interface psi_abgdxyz + interface psi_xyzw + subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (inout) :: w(:) + complex(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_zxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_zgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_dpk_ diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index 5a468d55..41bab5ab 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -156,6 +156,7 @@ module psb_c_base_vect_mod procedure, pass(z) :: axpby_a2 => c_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => c_base_abgdxyz + procedure, pass(w) :: xyzw => c_base_xyzw ! ! Vector by vector multiplication. Need all variants @@ -1155,22 +1156,37 @@ contains complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (.false.) then - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) - else - if (x%is_dev().and.(alpha/=czero)) call x%sync() - if (y%is_dev().and.(beta/=czero)) call y%sync() - if (z%is_dev().and.(delta/=czero)) call z%sync() - call psi_cabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) - call y%set_host() - call z%set_host() - end if - + if (x%is_dev().and.(alpha/=czero)) call x%sync() + if (y%is_dev().and.(beta/=czero)) call y%sync() + if (z%is_dev().and.(delta/=czero)) call z%sync() + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + end subroutine c_base_abgdxyz + subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_base_vect_type), intent(inout) :: z + class(psb_c_base_vect_type), intent(inout) :: w + complex(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=czero)) call x%sync() + if (y%is_dev().and.(b/=czero)) call y%sync() + if (z%is_dev().and.(d/=czero)) call z%sync() + if (w%is_dev().and.(f/=czero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine c_base_xyzw + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index e0488def..865f9456 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -103,6 +103,7 @@ module psb_c_vect_mod procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => c_vect_abgdxyz + procedure, pass(z) :: xyzw => c_vect_xyzw procedure, pass(y) :: mlt_v => c_vect_mlt_v procedure, pass(y) :: mlt_a => c_vect_mlt_a @@ -788,6 +789,22 @@ contains end subroutine c_vect_abgdxyz + subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_vect_type), intent(inout) :: x + class(psb_c_vect_type), intent(inout) :: y + class(psb_c_vect_type), intent(inout) :: z + class(psb_c_vect_type), intent(inout) :: w + complex(psb_spk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine c_vect_xyzw + subroutine c_vect_mlt_v(x, y, info) use psi_serial_mod diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 8f583cd3..1ad1ffa5 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -156,6 +156,7 @@ module psb_d_base_vect_mod procedure, pass(z) :: axpby_a2 => d_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => d_base_abgdxyz + procedure, pass(w) :: xyzw => d_base_xyzw ! ! Vector by vector multiplication. Need all variants @@ -1162,22 +1163,37 @@ contains real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (.false.) then - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) - else - if (x%is_dev().and.(alpha/=dzero)) call x%sync() - if (y%is_dev().and.(beta/=dzero)) call y%sync() - if (z%is_dev().and.(delta/=dzero)) call z%sync() - call psi_dabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) - call y%set_host() - call z%set_host() - end if - + if (x%is_dev().and.(alpha/=dzero)) call x%sync() + if (y%is_dev().and.(beta/=dzero)) call y%sync() + if (z%is_dev().and.(delta/=dzero)) call z%sync() + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + end subroutine d_base_abgdxyz + subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_base_vect_type), intent(inout) :: z + class(psb_d_base_vect_type), intent(inout) :: w + real(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=dzero)) call x%sync() + if (y%is_dev().and.(b/=dzero)) call y%sync() + if (z%is_dev().and.(d/=dzero)) call z%sync() + if (w%is_dev().and.(f/=dzero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine d_base_xyzw + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index 07007452..55dd8230 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -103,6 +103,7 @@ module psb_d_vect_mod procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => d_vect_abgdxyz + procedure, pass(z) :: xyzw => d_vect_xyzw procedure, pass(y) :: mlt_v => d_vect_mlt_v procedure, pass(y) :: mlt_a => d_vect_mlt_a @@ -795,6 +796,22 @@ contains end subroutine d_vect_abgdxyz + subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_vect_type), intent(inout) :: x + class(psb_d_vect_type), intent(inout) :: y + class(psb_d_vect_type), intent(inout) :: z + class(psb_d_vect_type), intent(inout) :: w + real(psb_dpk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine d_vect_xyzw + subroutine d_vect_mlt_v(x, y, info) use psi_serial_mod diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index 85bb3bda..26b82c31 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -156,6 +156,7 @@ module psb_s_base_vect_mod procedure, pass(z) :: axpby_a2 => s_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => s_base_abgdxyz + procedure, pass(w) :: xyzw => s_base_xyzw ! ! Vector by vector multiplication. Need all variants @@ -1162,22 +1163,37 @@ contains real(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (.false.) then - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) - else - if (x%is_dev().and.(alpha/=szero)) call x%sync() - if (y%is_dev().and.(beta/=szero)) call y%sync() - if (z%is_dev().and.(delta/=szero)) call z%sync() - call psi_sabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) - call y%set_host() - call z%set_host() - end if - + if (x%is_dev().and.(alpha/=szero)) call x%sync() + if (y%is_dev().and.(beta/=szero)) call y%sync() + if (z%is_dev().and.(delta/=szero)) call z%sync() + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + end subroutine s_base_abgdxyz + subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_base_vect_type), intent(inout) :: z + class(psb_s_base_vect_type), intent(inout) :: w + real(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=szero)) call x%sync() + if (y%is_dev().and.(b/=szero)) call y%sync() + if (z%is_dev().and.(d/=szero)) call z%sync() + if (w%is_dev().and.(f/=szero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine s_base_xyzw + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index aa16a04d..a50b2a0a 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -103,6 +103,7 @@ module psb_s_vect_mod procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => s_vect_abgdxyz + procedure, pass(z) :: xyzw => s_vect_xyzw procedure, pass(y) :: mlt_v => s_vect_mlt_v procedure, pass(y) :: mlt_a => s_vect_mlt_a @@ -795,6 +796,22 @@ contains end subroutine s_vect_abgdxyz + subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_vect_type), intent(inout) :: x + class(psb_s_vect_type), intent(inout) :: y + class(psb_s_vect_type), intent(inout) :: z + class(psb_s_vect_type), intent(inout) :: w + real(psb_spk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine s_vect_xyzw + subroutine s_vect_mlt_v(x, y, info) use psi_serial_mod diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index b30b1586..a3afc9c1 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -156,6 +156,7 @@ module psb_z_base_vect_mod procedure, pass(z) :: axpby_a2 => z_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => z_base_abgdxyz + procedure, pass(w) :: xyzw => z_base_xyzw ! ! Vector by vector multiplication. Need all variants @@ -1155,22 +1156,37 @@ contains complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (.false.) then - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) - else - if (x%is_dev().and.(alpha/=zzero)) call x%sync() - if (y%is_dev().and.(beta/=zzero)) call y%sync() - if (z%is_dev().and.(delta/=zzero)) call z%sync() - call psi_zabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) - call y%set_host() - call z%set_host() - end if - + if (x%is_dev().and.(alpha/=zzero)) call x%sync() + if (y%is_dev().and.(beta/=zzero)) call y%sync() + if (z%is_dev().and.(delta/=zzero)) call z%sync() + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + end subroutine z_base_abgdxyz + subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_base_vect_type), intent(inout) :: z + class(psb_z_base_vect_type), intent(inout) :: w + complex(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=zzero)) call x%sync() + if (y%is_dev().and.(b/=zzero)) call y%sync() + if (z%is_dev().and.(d/=zzero)) call z%sync() + if (w%is_dev().and.(f/=zzero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine z_base_xyzw + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index 58bf6b18..21e0c546 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -103,6 +103,7 @@ module psb_z_vect_mod procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => z_vect_abgdxyz + procedure, pass(z) :: xyzw => z_vect_xyzw procedure, pass(y) :: mlt_v => z_vect_mlt_v procedure, pass(y) :: mlt_a => z_vect_mlt_a @@ -788,6 +789,22 @@ contains end subroutine z_vect_abgdxyz + subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_vect_type), intent(inout) :: x + class(psb_z_vect_type), intent(inout) :: y + class(psb_z_vect_type), intent(inout) :: z + class(psb_z_vect_type), intent(inout) :: w + complex(psb_dpk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine z_vect_xyzw + subroutine z_vect_mlt_v(x, y, info) use psi_serial_mod diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90 index 557220e5..e230a1e0 100644 --- a/base/serial/psi_c_serial_impl.F90 +++ b/base/serial/psi_c_serial_impl.F90 @@ -1792,3 +1792,75 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) return end subroutine psi_cabgdxyz + +subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (inout) :: w(:) + complex(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='cabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==czero).or.(b==czero).or. & + & (c==czero).or.(d==czero).or.& + & (e==czero).or.(f==czero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_cxyzw diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90 index d423b401..bf1b2917 100644 --- a/base/serial/psi_d_serial_impl.F90 +++ b/base/serial/psi_d_serial_impl.F90 @@ -1792,3 +1792,75 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) return end subroutine psi_dabgdxyz + +subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (inout) :: w(:) + real(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='dabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==dzero).or.(b==dzero).or. & + & (c==dzero).or.(d==dzero).or.& + & (e==dzero).or.(f==dzero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_dxyzw diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90 index c7977c35..911ab4ec 100644 --- a/base/serial/psi_e_serial_impl.F90 +++ b/base/serial/psi_e_serial_impl.F90 @@ -1792,3 +1792,75 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) return end subroutine psi_eabgdxyz + +subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (inout) :: w(:) + integer(psb_epk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='eabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==ezero).or.(b==ezero).or. & + & (c==ezero).or.(d==ezero).or.& + & (e==ezero).or.(f==ezero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_exyzw diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90 index ce4aff80..fb42dfcd 100644 --- a/base/serial/psi_i2_serial_impl.F90 +++ b/base/serial/psi_i2_serial_impl.F90 @@ -1792,3 +1792,75 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) return end subroutine psi_i2abgdxyz + +subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (inout) :: w(:) + integer(psb_i2pk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='i2abgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==i2zero).or.(b==i2zero).or. & + & (c==i2zero).or.(d==i2zero).or.& + & (e==i2zero).or.(f==i2zero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_i2xyzw diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90 index 8d9d19f4..346fd897 100644 --- a/base/serial/psi_m_serial_impl.F90 +++ b/base/serial/psi_m_serial_impl.F90 @@ -1792,3 +1792,75 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) return end subroutine psi_mabgdxyz + +subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (inout) :: w(:) + integer(psb_mpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='mabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==mzero).or.(b==mzero).or. & + & (c==mzero).or.(d==mzero).or.& + & (e==mzero).or.(f==mzero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_mxyzw diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90 index df251b27..52f86bcd 100644 --- a/base/serial/psi_s_serial_impl.F90 +++ b/base/serial/psi_s_serial_impl.F90 @@ -1792,3 +1792,75 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) return end subroutine psi_sabgdxyz + +subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (inout) :: w(:) + real(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='sabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==szero).or.(b==szero).or. & + & (c==szero).or.(d==szero).or.& + & (e==szero).or.(f==szero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_sxyzw diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90 index 44ea5ae7..7e680273 100644 --- a/base/serial/psi_z_serial_impl.F90 +++ b/base/serial/psi_z_serial_impl.F90 @@ -1792,3 +1792,75 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) return end subroutine psi_zabgdxyz + +subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (inout) :: w(:) + complex(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='zabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==zzero).or.(b==zzero).or. & + & (c==zzero).or.(d==zzero).or.& + & (e==zzero).or.(f==zzero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_zxyzw From a11f328e62785f3e1684fb667b4a512ce7f4e77e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 5 Mar 2024 12:42:21 +0100 Subject: [PATCH 064/110] Added CUDA version of XYZW --- cuda/cvectordev.c | 22 +++++++++++++ cuda/cvectordev.h | 5 +++ cuda/dvectordev.c | 19 +++++++++++ cuda/dvectordev.h | 3 ++ cuda/psb_c_cuda_vect_mod.F90 | 64 ++++++++++++++++++++++++++++++++++-- cuda/psb_c_vectordev_mod.F90 | 12 +++++++ cuda/psb_d_cuda_vect_mod.F90 | 64 ++++++++++++++++++++++++++++++++++-- cuda/psb_d_vectordev_mod.F90 | 12 +++++++ cuda/psb_s_cuda_vect_mod.F90 | 64 ++++++++++++++++++++++++++++++++++-- cuda/psb_s_vectordev_mod.F90 | 12 +++++++ cuda/psb_z_cuda_vect_mod.F90 | 64 ++++++++++++++++++++++++++++++++++-- cuda/psb_z_vectordev_mod.F90 | 12 +++++++ cuda/spgpu/kernels/Makefile | 3 +- cuda/spgpu/vector.h | 46 ++++++++++++++++++++++++++ cuda/svectordev.c | 21 ++++++++++++ cuda/svectordev.h | 3 ++ cuda/zvectordev.c | 24 +++++++++++++- cuda/zvectordev.h | 5 +++ 18 files changed, 445 insertions(+), 10 deletions(-) diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c index 9db5202e..cdfda481 100644 --- a/cuda/cvectordev.c +++ b/cuda/cvectordev.c @@ -273,6 +273,28 @@ int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex return(i); } +int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex a,cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCxyzw(handle,n, a,b,c,d,e,f, + (cuFloatComplex *)devVecX->v_,(cuFloatComplex *) devVecY->v_, + (cuFloatComplex *) devVecZ->v_,(cuFloatComplex *) devVecW->v_); + return(i); +} + int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB) { int i = 0; diff --git a/cuda/cvectordev.h b/cuda/cvectordev.h index fc18e328..62693e27 100644 --- a/cuda/cvectordev.h +++ b/cuda/cvectordev.h @@ -72,6 +72,11 @@ int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, cuFloatComplex gamma, cuFloatComplex delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex a,cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB, cuFloatComplex beta, void *deviceVecZ); diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c index b4ca95f4..723f48d8 100644 --- a/cuda/dvectordev.c +++ b/cuda/dvectordev.c @@ -258,6 +258,25 @@ int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, do return(i); } +int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuDxyzw(handle,n, a,b,c,d,e,f, + (double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_,(double*) devVecW->v_); + return(i); +} + int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; diff --git a/cuda/dvectordev.h b/cuda/dvectordev.h index 81a2e8f6..c2bfa1b5 100644 --- a/cuda/dvectordev.h +++ b/cuda/dvectordev.h @@ -69,6 +69,9 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY); int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB, double beta, void *deviceVecZ); diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index 7eee128f..727249df 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -914,7 +914,6 @@ contains end subroutine c_cuda_axpby_v - subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none @@ -975,9 +974,70 @@ contains call z%axpby(m,gamma,y,delta,info) end if - end subroutine c_cuda_abgdxyz + subroutine c_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_base_vect_type), intent(inout) :: z + class(psb_c_vect_cuda), intent(inout) :: w + complex(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz, nw + logical :: gpu_done + + info = psb_success_ + + gpu_done = .false. + if ((a==czero).or.(b==czero).or. & + & (c==czero).or.(d==czero).or.& + & (e==czero).or.(f==czero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + select type(xx => x) + class is (psb_c_vect_cuda) + select type(yy => y) + class is (psb_c_vect_cuda) + select type(zz => z) + class is (psb_c_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nx x) + class is (psb_d_vect_cuda) + select type(yy => y) + class is (psb_d_vect_cuda) + select type(zz => z) + class is (psb_d_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nx x) + class is (psb_s_vect_cuda) + select type(yy => y) + class is (psb_s_vect_cuda) + select type(zz => z) + class is (psb_s_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nx x) + class is (psb_z_vect_cuda) + select type(yy => y) + class is (psb_z_vect_cuda) + select type(zz => z) + class is (psb_z_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nxpitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSxyzw(handle,n, a,b,c,d,e,f, + (float*)devVecX->v_,(float*) devVecY->v_, + (float*) devVecZ->v_,(float*) devVecW->v_); + return(i); +} + int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; diff --git a/cuda/svectordev.h b/cuda/svectordev.h index 730f929a..363c0108 100644 --- a/cuda/svectordev.h +++ b/cuda/svectordev.h @@ -69,6 +69,9 @@ int dotMultiVecDeviceFloat(float* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceFloat(int n, float alpha, void* devVecX, float beta, void* devVecY); int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceFloat(int n,float a,float b, float c, float d, float e, float f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB, float beta, void *deviceVecZ); diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c index d1f23f2a..e9f0cec7 100644 --- a/cuda/zvectordev.c +++ b/cuda/zvectordev.c @@ -251,7 +251,29 @@ int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, (cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_,(cuDoubleComplex *) devVecZ->v_); return(i); } - + +int xyzwMultiVecDeviceDoubleComplex(int n,cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZxyzw(handle,n, a,b,c,d,e,f, + (cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_, + (cuDoubleComplex *) devVecZ->v_,(cuDoubleComplex *) devVecW->v_); + return(i); +} + int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMultiVecX, cuDoubleComplex beta, void* devMultiVecY) { int j=0, i=0; diff --git a/cuda/zvectordev.h b/cuda/zvectordev.h index 4c32f11c..ae623bdb 100644 --- a/cuda/zvectordev.h +++ b/cuda/zvectordev.h @@ -80,6 +80,11 @@ int axpbyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void* devVecX int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceDoubleComplex(int n,cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, From 48455190ecd4e78a43be5a5d6f9c9749cce606a2 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 5 Mar 2024 13:57:03 +0100 Subject: [PATCH 065/110] Add GPU version of XYZW --- cuda/spgpu/kernels/cxyzw.cu | 78 +++++++++++++++++++++++++++++++++++++ cuda/spgpu/kernels/dxyzw.cu | 78 +++++++++++++++++++++++++++++++++++++ cuda/spgpu/kernels/sxyzw.cu | 78 +++++++++++++++++++++++++++++++++++++ cuda/spgpu/kernels/zxyzw.cu | 78 +++++++++++++++++++++++++++++++++++++ 4 files changed, 312 insertions(+) create mode 100644 cuda/spgpu/kernels/cxyzw.cu create mode 100644 cuda/spgpu/kernels/dxyzw.cu create mode 100644 cuda/spgpu/kernels/sxyzw.cu create mode 100644 cuda/spgpu/kernels/zxyzw.cu diff --git a/cuda/spgpu/kernels/cxyzw.cu b/cuda/spgpu/kernels/cxyzw.cu new file mode 100644 index 00000000..d2b332b1 --- /dev/null +++ b/cuda/spgpu/kernels/cxyzw.cu @@ -0,0 +1,78 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuCxyzw_krn(int n, cuFloatComplex a, cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + cuFloatComplex * x, cuFloatComplex *y, + cuFloatComplex *z, cuFloatComplex *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuFloatComplex ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = cuCfmaf(a, x[id], cuCmulf(b,y[id])); + tz = cuCfmaf(c, ty, cuCmulf(d,z[id])); + w[id] = cuCfmaf(e, tz, cuCmulf(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuCxyzw(spgpuHandle_t handle, + int n, + cuFloatComplex a, cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + __device cuFloatComplex * x, + __device cuFloatComplex * y, + __device cuFloatComplex * z, + __device cuFloatComplex *w) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuCxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + diff --git a/cuda/spgpu/kernels/dxyzw.cu b/cuda/spgpu/kernels/dxyzw.cu new file mode 100644 index 00000000..afd36651 --- /dev/null +++ b/cuda/spgpu/kernels/dxyzw.cu @@ -0,0 +1,78 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuDxyzw_krn(int n, double a, double b, + double c, double d, + double e, double f, + double * x, double *y, + double *z, double *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + double ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = PREC_DADD(PREC_DADD(a, x[id]), PREC_DMUL(b,y[id])); + tz = PREC_DADD(PREC_DADD(c, ty), PREC_DMUL(d,z[id])); + w[id] = PREC_DADD(PREC_DADD(e, tz), PREC_DMUL(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuDxyzw(spgpuHandle_t handle, + int n, + double a, double b, + double c, double d, + double e, double f, + __device double * x, + __device double * y, + __device double * z, + __device double *w) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuDxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + diff --git a/cuda/spgpu/kernels/sxyzw.cu b/cuda/spgpu/kernels/sxyzw.cu new file mode 100644 index 00000000..9cedd02f --- /dev/null +++ b/cuda/spgpu/kernels/sxyzw.cu @@ -0,0 +1,78 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuSxyzw_krn(int n, float a, float b, + float c, float d, + float e, float f, + float * x, float *y, + float *z, float *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + float ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = PREC_FADD(PREC_FMUL(a, x[id]), PREC_FMUL(b,y[id])); + tz = PREC_FADD(PREC_FMUL(c, ty), PREC_FMUL(d,z[id])); + w[id] = PREC_FADD(PREC_FMUL(e, tz), PREC_FMUL(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuSxyzw(spgpuHandle_t handle, + int n, + float a, float b, + float c, float d, + float e, float f, + __device float * x, + __device float * y, + __device float * z, + __device float *w) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuSxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + diff --git a/cuda/spgpu/kernels/zxyzw.cu b/cuda/spgpu/kernels/zxyzw.cu new file mode 100644 index 00000000..7a61edee --- /dev/null +++ b/cuda/spgpu/kernels/zxyzw.cu @@ -0,0 +1,78 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuZxyzw_krn(int n, cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + cuDoubleComplex * x, cuDoubleComplex *y, + cuDoubleComplex *z, cuDoubleComplex *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuDoubleComplex ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = cuCfma(a, x[id], cuCmul(b,y[id])); + tz = cuCfma(c, ty, cuCmul(d,z[id])); + w[id] = cuCfma(e, tz, cuCmul(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuZxyzw(spgpuHandle_t handle, + int n, + cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + __device cuDoubleComplex * x, + __device cuDoubleComplex * y, + __device cuDoubleComplex * z, + __device cuDoubleComplex *w) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuZxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + From e0a4d362fa5c89705bbe1a77c89e0cfab9826af9 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 14 Mar 2024 12:08:57 +0100 Subject: [PATCH 066/110] Define flag TRACK_CUDA_MALLOC --- cuda/cuda_util.c | 10 +++++++++- cuda/fcusparse_fct.h | 15 +++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c index c0e5c6e5..3fe61cc0 100644 --- a/cuda/cuda_util.c +++ b/cuda/cuda_util.c @@ -37,11 +37,19 @@ static int hasUVA=-1; static struct cudaDeviceProp *prop=NULL; static spgpuHandle_t psb_cuda_handle = NULL; static cublasHandle_t psb_cublas_handle = NULL; - +#if defined(TRACK_CUDA_MALLOC) +static long long total_cuda_mem = 0; +#endif int allocRemoteBuffer(void** buffer, int count) { cudaError_t err = cudaMalloc(buffer, count); +#if defined(TRACK_CUDA_MALLOC) + total_cuda_mem += count; + fprintf(stderr,"Tracking CUDA allocRemoteBuffer for %ld bytes total %ld address %p\n", + count, total_cuda_mem, *buffer); +#endif + if (err == cudaSuccess) { return SPGPU_SUCCESS; diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 06facdc0..689bdc93 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -178,7 +178,8 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, CHECK_CUDA(cudaFree(cMat->mvbuffer)); cMat->mvbuffer = NULL; } - CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + //CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->mvbuffer), bfsz); cMat->mvbsize = bfsz; } CHECK_CUSPARSE(cusparseCsrmvEx(*my_handle, @@ -215,7 +216,9 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, CHECK_CUDA(cudaFree(cMat->mvbuffer)); cMat->mvbuffer = NULL; } - CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + //CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->mvbuffer), bfsz); + cMat->mvbsize = bfsz; } CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -287,7 +290,9 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, CHECK_CUDA(cudaFree(cMat->svbuffer)); cMat->svbuffer = NULL; } - CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + //CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->svbuffer), bfsz); + cMat->svbsize=bfsz; CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -382,7 +387,9 @@ int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) /* cMat->svbuffer = NULL; */ /* } */ if (bfsz > 0) { - CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + //CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->svbuffer), bfsz); + } else { cMat->svbuffer=NULL; } From 472f16f0df839b7ee678b795477daf411e4bd663 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 19 Mar 2024 14:00:03 +0100 Subject: [PATCH 067/110] Fix compilation with --enable-serial --- base/modules/penv/psi_c_collective_mod.F90 | 6 +++--- base/modules/penv/psi_d_collective_mod.F90 | 6 +++--- base/modules/penv/psi_e_collective_mod.F90 | 6 +++--- base/modules/penv/psi_i2_collective_mod.F90 | 6 +++--- base/modules/penv/psi_m_collective_mod.F90 | 6 +++--- base/modules/penv/psi_s_collective_mod.F90 | 6 +++--- base/modules/penv/psi_z_collective_mod.F90 | 6 +++--- 7 files changed, 21 insertions(+), 21 deletions(-) diff --git a/base/modules/penv/psi_c_collective_mod.F90 b/base/modules/penv/psi_c_collective_mod.F90 index 8da302d0..80a4b6a1 100644 --- a/base/modules/penv/psi_c_collective_mod.F90 +++ b/base/modules/penv/psi_c_collective_mod.F90 @@ -107,7 +107,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -175,7 +175,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -245,7 +245,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_d_collective_mod.F90 b/base/modules/penv/psi_d_collective_mod.F90 index 9639d650..67f95f55 100644 --- a/base/modules/penv/psi_d_collective_mod.F90 +++ b/base/modules/penv/psi_d_collective_mod.F90 @@ -747,7 +747,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -815,7 +815,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -885,7 +885,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_e_collective_mod.F90 b/base/modules/penv/psi_e_collective_mod.F90 index b9ab089b..5d66eed6 100644 --- a/base/modules/penv/psi_e_collective_mod.F90 +++ b/base/modules/penv/psi_e_collective_mod.F90 @@ -585,7 +585,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -653,7 +653,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -723,7 +723,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_i2_collective_mod.F90 b/base/modules/penv/psi_i2_collective_mod.F90 index 339e4281..88d40b66 100644 --- a/base/modules/penv/psi_i2_collective_mod.F90 +++ b/base/modules/penv/psi_i2_collective_mod.F90 @@ -585,7 +585,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -653,7 +653,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -723,7 +723,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_m_collective_mod.F90 b/base/modules/penv/psi_m_collective_mod.F90 index 8f45d398..c97ac5a3 100644 --- a/base/modules/penv/psi_m_collective_mod.F90 +++ b/base/modules/penv/psi_m_collective_mod.F90 @@ -585,7 +585,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -653,7 +653,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -723,7 +723,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_s_collective_mod.F90 b/base/modules/penv/psi_s_collective_mod.F90 index 6ffaae05..6dcc5253 100644 --- a/base/modules/penv/psi_s_collective_mod.F90 +++ b/base/modules/penv/psi_s_collective_mod.F90 @@ -747,7 +747,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -815,7 +815,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -885,7 +885,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_z_collective_mod.F90 b/base/modules/penv/psi_z_collective_mod.F90 index 8b3ec277..ff5e6a2d 100644 --- a/base/modules/penv/psi_z_collective_mod.F90 +++ b/base/modules/penv/psi_z_collective_mod.F90 @@ -107,7 +107,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -175,7 +175,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -245,7 +245,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) From 373d841bce7fab5672f43c48e49c995e42262dfa Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 22 Mar 2024 15:44:51 +0100 Subject: [PATCH 068/110] Don't need renaming of psi_gth and psi_sct --- base/modules/serial/psb_serial_mod.f90 | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/base/modules/serial/psb_serial_mod.f90 b/base/modules/serial/psb_serial_mod.f90 index 627b318e..a25c1c37 100644 --- a/base/modules/serial/psb_serial_mod.f90 +++ b/base/modules/serial/psb_serial_mod.f90 @@ -36,9 +36,7 @@ module psb_serial_mod use psb_string_mod use psb_sort_mod - use psi_serial_mod, & - & psb_gth => psi_gth,& - & psb_sct => psi_sct + use psi_serial_mod use psb_s_serial_mod use psb_d_serial_mod From 3a25d7b04a7931e5fb4e793dfafbf3adeb011ff0 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 22 Mar 2024 17:10:30 +0100 Subject: [PATCH 069/110] Fixes for LLVM compilation --- base/comm/internals/psi_cswapdata_a.F90 | 4 ++-- base/comm/internals/psi_cswaptran_a.F90 | 4 ++-- base/comm/internals/psi_dswapdata_a.F90 | 4 ++-- base/comm/internals/psi_dswaptran_a.F90 | 4 ++-- base/comm/internals/psi_eswapdata_a.F90 | 4 ++-- base/comm/internals/psi_eswaptran_a.F90 | 4 ++-- base/comm/internals/psi_i2swapdata_a.F90 | 4 ++-- base/comm/internals/psi_i2swaptran_a.F90 | 4 ++-- base/comm/internals/psi_mswapdata_a.F90 | 4 ++-- base/comm/internals/psi_mswaptran_a.F90 | 4 ++-- base/comm/internals/psi_sswapdata_a.F90 | 4 ++-- base/comm/internals/psi_sswaptran_a.F90 | 4 ++-- base/comm/internals/psi_zswapdata_a.F90 | 4 ++-- base/comm/internals/psi_zswaptran_a.F90 | 4 ++-- cuda/psb_c_cuda_vect_mod.F90 | 2 +- ext/psb_c_ell_mat_mod.f90 | 8 ++++++++ ext/psb_d_ell_mat_mod.f90 | 8 ++++++++ ext/psb_s_ell_mat_mod.f90 | 8 ++++++++ ext/psb_z_ell_mat_mod.f90 | 8 ++++++++ 19 files changed, 61 insertions(+), 29 deletions(-) diff --git a/base/comm/internals/psi_cswapdata_a.F90 b/base/comm/internals/psi_cswapdata_a.F90 index 715b674e..153fa4d1 100644 --- a/base/comm/internals/psi_cswapdata_a.F90 +++ b/base/comm/internals/psi_cswapdata_a.F90 @@ -191,7 +191,7 @@ subroutine psi_cswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -676,7 +676,7 @@ subroutine psi_cswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_cswaptran_a.F90 b/base/comm/internals/psi_cswaptran_a.F90 index a7f2c687..bfb192c1 100644 --- a/base/comm/internals/psi_cswaptran_a.F90 +++ b/base/comm/internals/psi_cswaptran_a.F90 @@ -195,7 +195,7 @@ subroutine psi_ctranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -688,7 +688,7 @@ subroutine psi_ctranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_dswapdata_a.F90 b/base/comm/internals/psi_dswapdata_a.F90 index aff32517..4422597e 100644 --- a/base/comm/internals/psi_dswapdata_a.F90 +++ b/base/comm/internals/psi_dswapdata_a.F90 @@ -191,7 +191,7 @@ subroutine psi_dswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -676,7 +676,7 @@ subroutine psi_dswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_dswaptran_a.F90 b/base/comm/internals/psi_dswaptran_a.F90 index ed13df40..a5b3fbfc 100644 --- a/base/comm/internals/psi_dswaptran_a.F90 +++ b/base/comm/internals/psi_dswaptran_a.F90 @@ -195,7 +195,7 @@ subroutine psi_dtranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -688,7 +688,7 @@ subroutine psi_dtranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_eswapdata_a.F90 b/base/comm/internals/psi_eswapdata_a.F90 index 6a644563..74a5aea8 100644 --- a/base/comm/internals/psi_eswapdata_a.F90 +++ b/base/comm/internals/psi_eswapdata_a.F90 @@ -191,7 +191,7 @@ subroutine psi_eswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -676,7 +676,7 @@ subroutine psi_eswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_eswaptran_a.F90 b/base/comm/internals/psi_eswaptran_a.F90 index 78ed7d8b..e10b574c 100644 --- a/base/comm/internals/psi_eswaptran_a.F90 +++ b/base/comm/internals/psi_eswaptran_a.F90 @@ -195,7 +195,7 @@ subroutine psi_etranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -688,7 +688,7 @@ subroutine psi_etranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_i2swapdata_a.F90 b/base/comm/internals/psi_i2swapdata_a.F90 index 42b4498e..75e0c870 100644 --- a/base/comm/internals/psi_i2swapdata_a.F90 +++ b/base/comm/internals/psi_i2swapdata_a.F90 @@ -191,7 +191,7 @@ subroutine psi_i2swapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -676,7 +676,7 @@ subroutine psi_i2swapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_i2swaptran_a.F90 b/base/comm/internals/psi_i2swaptran_a.F90 index f94bf29e..0615e2ba 100644 --- a/base/comm/internals/psi_i2swaptran_a.F90 +++ b/base/comm/internals/psi_i2swaptran_a.F90 @@ -195,7 +195,7 @@ subroutine psi_i2tranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -688,7 +688,7 @@ subroutine psi_i2tranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_mswapdata_a.F90 b/base/comm/internals/psi_mswapdata_a.F90 index e71f3a52..01277d8e 100644 --- a/base/comm/internals/psi_mswapdata_a.F90 +++ b/base/comm/internals/psi_mswapdata_a.F90 @@ -191,7 +191,7 @@ subroutine psi_mswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -676,7 +676,7 @@ subroutine psi_mswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_mswaptran_a.F90 b/base/comm/internals/psi_mswaptran_a.F90 index 3a780142..ee8c1d21 100644 --- a/base/comm/internals/psi_mswaptran_a.F90 +++ b/base/comm/internals/psi_mswaptran_a.F90 @@ -195,7 +195,7 @@ subroutine psi_mtranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -688,7 +688,7 @@ subroutine psi_mtranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_sswapdata_a.F90 b/base/comm/internals/psi_sswapdata_a.F90 index 044dc141..47830330 100644 --- a/base/comm/internals/psi_sswapdata_a.F90 +++ b/base/comm/internals/psi_sswapdata_a.F90 @@ -191,7 +191,7 @@ subroutine psi_sswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -676,7 +676,7 @@ subroutine psi_sswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_sswaptran_a.F90 b/base/comm/internals/psi_sswaptran_a.F90 index 434cec4c..ae54ee21 100644 --- a/base/comm/internals/psi_sswaptran_a.F90 +++ b/base/comm/internals/psi_sswaptran_a.F90 @@ -195,7 +195,7 @@ subroutine psi_stranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -688,7 +688,7 @@ subroutine psi_stranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_zswapdata_a.F90 b/base/comm/internals/psi_zswapdata_a.F90 index 2d265c76..49617d2d 100644 --- a/base/comm/internals/psi_zswapdata_a.F90 +++ b/base/comm/internals/psi_zswapdata_a.F90 @@ -191,7 +191,7 @@ subroutine psi_zswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -676,7 +676,7 @@ subroutine psi_zswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_zswaptran_a.F90 b/base/comm/internals/psi_zswaptran_a.F90 index 508d4045..586fa11b 100644 --- a/base/comm/internals/psi_zswaptran_a.F90 +++ b/base/comm/internals/psi_zswaptran_a.F90 @@ -195,7 +195,7 @@ subroutine psi_ztranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -688,7 +688,7 @@ subroutine psi_ztranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index 38480e34..2c2a4f61 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_c_cuda_vect_mod +module psb_c_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod diff --git a/ext/psb_c_ell_mat_mod.f90 b/ext/psb_c_ell_mat_mod.f90 index 8eaf01ba..6954946f 100644 --- a/ext/psb_c_ell_mat_mod.f90 +++ b/ext/psb_c_ell_mat_mod.f90 @@ -94,6 +94,7 @@ module psb_c_ell_mat_mod procedure, pass(a) :: print => psb_c_ell_print procedure, pass(a) :: free => c_ell_free procedure, pass(a) :: mold => psb_c_ell_mold + procedure, pass(a) :: get_nrm => c_ell_get_nrm end type psb_c_ell_sparse_mat @@ -459,6 +460,13 @@ contains res = 'ELL' end function c_ell_get_fmt + function c_ell_get_nrm(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function c_ell_get_nrm + function c_ell_get_nzeros(a) result(res) implicit none class(psb_c_ell_sparse_mat), intent(in) :: a diff --git a/ext/psb_d_ell_mat_mod.f90 b/ext/psb_d_ell_mat_mod.f90 index 3e34d63e..cc945baf 100644 --- a/ext/psb_d_ell_mat_mod.f90 +++ b/ext/psb_d_ell_mat_mod.f90 @@ -94,6 +94,7 @@ module psb_d_ell_mat_mod procedure, pass(a) :: print => psb_d_ell_print procedure, pass(a) :: free => d_ell_free procedure, pass(a) :: mold => psb_d_ell_mold + procedure, pass(a) :: get_nrm => d_ell_get_nrm end type psb_d_ell_sparse_mat @@ -459,6 +460,13 @@ contains res = 'ELL' end function d_ell_get_fmt + function d_ell_get_nrm(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function d_ell_get_nrm + function d_ell_get_nzeros(a) result(res) implicit none class(psb_d_ell_sparse_mat), intent(in) :: a diff --git a/ext/psb_s_ell_mat_mod.f90 b/ext/psb_s_ell_mat_mod.f90 index 5f09913a..9e3dd8b4 100644 --- a/ext/psb_s_ell_mat_mod.f90 +++ b/ext/psb_s_ell_mat_mod.f90 @@ -94,6 +94,7 @@ module psb_s_ell_mat_mod procedure, pass(a) :: print => psb_s_ell_print procedure, pass(a) :: free => s_ell_free procedure, pass(a) :: mold => psb_s_ell_mold + procedure, pass(a) :: get_nrm => s_ell_get_nrm end type psb_s_ell_sparse_mat @@ -459,6 +460,13 @@ contains res = 'ELL' end function s_ell_get_fmt + function s_ell_get_nrm(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function s_ell_get_nrm + function s_ell_get_nzeros(a) result(res) implicit none class(psb_s_ell_sparse_mat), intent(in) :: a diff --git a/ext/psb_z_ell_mat_mod.f90 b/ext/psb_z_ell_mat_mod.f90 index 52dc62b1..0e1f0e00 100644 --- a/ext/psb_z_ell_mat_mod.f90 +++ b/ext/psb_z_ell_mat_mod.f90 @@ -94,6 +94,7 @@ module psb_z_ell_mat_mod procedure, pass(a) :: print => psb_z_ell_print procedure, pass(a) :: free => z_ell_free procedure, pass(a) :: mold => psb_z_ell_mold + procedure, pass(a) :: get_nrm => z_ell_get_nrm end type psb_z_ell_sparse_mat @@ -459,6 +460,13 @@ contains res = 'ELL' end function z_ell_get_fmt + function z_ell_get_nrm(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function z_ell_get_nrm + function z_ell_get_nzeros(a) result(res) implicit none class(psb_z_ell_sparse_mat), intent(in) :: a From 0023b8ac7803dc9c05a2de87e2b9af2479e1bc6c Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 25 Mar 2024 15:41:47 +0100 Subject: [PATCH 070/110] Compile adjcncy_fnd_owner --- base/internals/psi_adjcncy_fnd_owner.F90 | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/base/internals/psi_adjcncy_fnd_owner.F90 b/base/internals/psi_adjcncy_fnd_owner.F90 index 639cdb5d..83e18623 100644 --- a/base/internals/psi_adjcncy_fnd_owner.F90 +++ b/base/internals/psi_adjcncy_fnd_owner.F90 @@ -87,7 +87,7 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) integer(psb_lpk_) :: mglob, ih type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me - logical, parameter :: gettime=.true., debug=.false. + logical, parameter :: debug=.false. integer(psb_mpk_) :: xchg_alg logical, parameter :: do_timings=.false. integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1 @@ -132,10 +132,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) goto 9999 end if - if (gettime) then - t0 = psb_wtime() - end if - nadj = size(adj) nidx = size(idx) call psb_realloc(nidx,iprc,info) From 59e6df73a4f7b65730d3ee774e99732812df960b Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 1 Apr 2024 15:57:44 +0200 Subject: [PATCH 071/110] Make sure configure recognizes FLANG --- configure | 21 +++++++++++++-------- configure.ac | 15 ++++++++++----- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/configure b/configure index 3a774ee1..0421872d 100755 --- a/configure +++ b/configure @@ -3452,7 +3452,7 @@ ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu if test -n "$ac_tool_prefix"; then - for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 ifort ifc nagfor gfortran + for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 flang-new ifort ifc nagfor gfortran do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 @@ -3501,7 +3501,7 @@ fi fi if test -z "$FC"; then ac_ct_FC=$FC - for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 ifort ifc nagfor gfortran + for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 flang-new ifort ifc nagfor gfortran do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -3928,7 +3928,7 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then - for ac_prog in xlc pgcc icc gcc cc + for ac_prog in xlc pgcc clang icc gcc cc do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 @@ -3977,7 +3977,7 @@ fi fi if test -z "$CC"; then ac_ct_CC=$CC - for ac_prog in xlc pgcc icc gcc cc + for ac_prog in xlc pgcc clang icc gcc cc do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -4424,7 +4424,7 @@ if test -z "$CXX"; then CXX=$CCC else if test -n "$ac_tool_prefix"; then - for ac_prog in CC xlc++ icpc g++ + for ac_prog in CC xlc++ clang++ icpc g++ do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 @@ -4473,7 +4473,7 @@ fi fi if test -z "$CXX"; then ac_ct_CXX=$CXX - for ac_prog in CC xlc++ icpc g++ + for ac_prog in CC xlc++ clang++ icpc g++ do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -6874,10 +6874,12 @@ if test x"$psblas_cv_fc" == "x" ; then # Sun compiler detection psblas_cv_fc="sun" - elif eval "$MPIFC -V 2>&1 | grep Portland 2>/dev/null" ; then + elif eval "$MPIFC --version 2>&1 | grep flang-new 2>/dev/null" ; then # Portland group compiler detection - psblas_cv_fc="pg" + psblas_cv_fc="flang" + psblas_cv_define_prepend="" + FDEFINES="$psblas_cv_define_prepend-DFLANG $FDEFINES" elif eval "$MPIFC -V 2>&1 | grep Intel.*Fortran.*Compiler 2>/dev/null" ; then # Intel compiler identification @@ -7301,6 +7303,9 @@ if test "X$FCOPT" == "X" ; then elif test "X$psblas_cv_fc" == X"pg" ; then # other compilers .. FCOPT="-fast $FCOPT" + elif test "X$psblas_cv_fc" == X"flang" ; then + # other compilers .. + FCOPT="-O3" # NOTE : PG & Sun use -fast instead -O3 elif test "X$psblas_cv_fc" == X"sun" ; then # other compilers .. diff --git a/configure.ac b/configure.ac index 62aad4f8..c5cff358 100755 --- a/configure.ac +++ b/configure.ac @@ -100,17 +100,17 @@ dnl We set our own FC flags, ignore those from AC_PROG_FC but not those from the dnl environment variable. Same for C dnl save_FCFLAGS="$FCFLAGS"; -AC_PROG_FC([ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 ifort ifc nagfor gfortran]) +AC_PROG_FC([ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 flang-new ifort ifc nagfor gfortran]) FCFLAGS="$save_FCFLAGS"; save_CFLAGS="$CFLAGS"; -AC_PROG_CC([xlc pgcc icc gcc cc ]) +AC_PROG_CC([xlc pgcc clang icc gcc cc ]) if test "x$ac_cv_prog_cc_stdc" == "xno" ; then AC_MSG_ERROR([Problem : Need a C99 compiler ! ]) else C99OPT="$ac_cv_prog_cc_stdc"; fi CFLAGS="$save_CFLAGS"; -AC_PROG_CXX([CC xlc++ icpc g++]) +AC_PROG_CXX([CC xlc++ clang++ icpc g++]) dnl AC_PROG_F90 doesn't exist, at the time of writing this ! dnl AC_PROG_F90 @@ -248,10 +248,12 @@ if test x"$psblas_cv_fc" == "x" ; then # Sun compiler detection psblas_cv_fc="sun" - elif eval "$MPIFC -V 2>&1 | grep Portland 2>/dev/null" ; then + elif eval "$MPIFC --version 2>&1 | grep flang-new 2>/dev/null" ; then # Portland group compiler detection - psblas_cv_fc="pg" + psblas_cv_fc="flang" + psblas_cv_define_prepend="" + FDEFINES="$psblas_cv_define_prepend-DFLANG $FDEFINES" elif eval "$MPIFC -V 2>&1 | grep Intel.*Fortran.*Compiler 2>/dev/null" ; then # Intel compiler identification @@ -427,6 +429,9 @@ if test "X$FCOPT" == "X" ; then elif test "X$psblas_cv_fc" == X"pg" ; then # other compilers .. FCOPT="-fast $FCOPT" + elif test "X$psblas_cv_fc" == X"flang" ; then + # other compilers .. + FCOPT="-O3" # NOTE : PG & Sun use -fast instead -O3 elif test "X$psblas_cv_fc" == X"sun" ; then # other compilers .. From a2f92e616f5c25467b7b36a2bf7ff7a7b1db472e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 1 Apr 2024 15:58:32 +0200 Subject: [PATCH 072/110] Put VOLATILE under ifdef for FLANG --- base/comm/internals/psi_cswapdata_a.F90 | 8 ++++++-- base/comm/internals/psi_cswaptran_a.F90 | 8 ++++++-- base/comm/internals/psi_dswapdata_a.F90 | 8 ++++++-- base/comm/internals/psi_dswaptran_a.F90 | 8 ++++++-- base/comm/internals/psi_eswapdata_a.F90 | 8 ++++++-- base/comm/internals/psi_eswaptran_a.F90 | 8 ++++++-- base/comm/internals/psi_i2swapdata_a.F90 | 8 ++++++-- base/comm/internals/psi_i2swaptran_a.F90 | 8 ++++++-- base/comm/internals/psi_mswapdata_a.F90 | 8 ++++++-- base/comm/internals/psi_mswaptran_a.F90 | 8 ++++++-- base/comm/internals/psi_sswapdata_a.F90 | 8 ++++++-- base/comm/internals/psi_sswaptran_a.F90 | 8 ++++++-- base/comm/internals/psi_zswapdata_a.F90 | 8 ++++++-- base/comm/internals/psi_zswaptran_a.F90 | 8 ++++++-- 14 files changed, 84 insertions(+), 28 deletions(-) diff --git a/base/comm/internals/psi_cswapdata_a.F90 b/base/comm/internals/psi_cswapdata_a.F90 index 153fa4d1..acf7f342 100644 --- a/base/comm/internals/psi_cswapdata_a.F90 +++ b/base/comm/internals/psi_cswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_cswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_cswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_cswaptran_a.F90 b/base/comm/internals/psi_cswaptran_a.F90 index bfb192c1..0aa5d6f6 100644 --- a/base/comm/internals/psi_cswaptran_a.F90 +++ b/base/comm/internals/psi_cswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_ctranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_ctranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_dswapdata_a.F90 b/base/comm/internals/psi_dswapdata_a.F90 index 4422597e..ff562994 100644 --- a/base/comm/internals/psi_dswapdata_a.F90 +++ b/base/comm/internals/psi_dswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_dswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_dswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_dswaptran_a.F90 b/base/comm/internals/psi_dswaptran_a.F90 index a5b3fbfc..a0506a2b 100644 --- a/base/comm/internals/psi_dswaptran_a.F90 +++ b/base/comm/internals/psi_dswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_dtranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_dtranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_eswapdata_a.F90 b/base/comm/internals/psi_eswapdata_a.F90 index 74a5aea8..acc6a6bf 100644 --- a/base/comm/internals/psi_eswapdata_a.F90 +++ b/base/comm/internals/psi_eswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_eswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_eswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_eswaptran_a.F90 b/base/comm/internals/psi_eswaptran_a.F90 index e10b574c..ce12a7e7 100644 --- a/base/comm/internals/psi_eswaptran_a.F90 +++ b/base/comm/internals/psi_eswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_etranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_etranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_i2swapdata_a.F90 b/base/comm/internals/psi_i2swapdata_a.F90 index 75e0c870..e2a8ad4e 100644 --- a/base/comm/internals/psi_i2swapdata_a.F90 +++ b/base/comm/internals/psi_i2swapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_i2swapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_i2swapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_i2swaptran_a.F90 b/base/comm/internals/psi_i2swaptran_a.F90 index 0615e2ba..36652565 100644 --- a/base/comm/internals/psi_i2swaptran_a.F90 +++ b/base/comm/internals/psi_i2swaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_i2tranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_i2tranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_mswapdata_a.F90 b/base/comm/internals/psi_mswapdata_a.F90 index 01277d8e..c7f1965b 100644 --- a/base/comm/internals/psi_mswapdata_a.F90 +++ b/base/comm/internals/psi_mswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_mswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_mswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_mswaptran_a.F90 b/base/comm/internals/psi_mswaptran_a.F90 index ee8c1d21..1e9fcede 100644 --- a/base/comm/internals/psi_mswaptran_a.F90 +++ b/base/comm/internals/psi_mswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_mtranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_mtranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_sswapdata_a.F90 b/base/comm/internals/psi_sswapdata_a.F90 index 47830330..c802578c 100644 --- a/base/comm/internals/psi_sswapdata_a.F90 +++ b/base/comm/internals/psi_sswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_sswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_sswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_sswaptran_a.F90 b/base/comm/internals/psi_sswaptran_a.F90 index ae54ee21..c4290be1 100644 --- a/base/comm/internals/psi_sswaptran_a.F90 +++ b/base/comm/internals/psi_sswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_stranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_stranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_zswapdata_a.F90 b/base/comm/internals/psi_zswapdata_a.F90 index 49617d2d..ec21072c 100644 --- a/base/comm/internals/psi_zswapdata_a.F90 +++ b/base/comm/internals/psi_zswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_zswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_zswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_zswaptran_a.F90 b/base/comm/internals/psi_zswaptran_a.F90 index 586fa11b..c1062839 100644 --- a/base/comm/internals/psi_zswaptran_a.F90 +++ b/base/comm/internals/psi_zswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_ztranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_ztranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ From 4347c663c298088cb128b7c155536d3b5d1c09af Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 4 Apr 2024 10:52:41 +0200 Subject: [PATCH 073/110] Change conftest **argv to recognize CUDA_VERSION. --- config/pac.m4 | 2 +- configure | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/pac.m4 b/config/pac.m4 index 52185439..0d22392f 100644 --- a/config/pac.m4 +++ b/config/pac.m4 @@ -2251,7 +2251,7 @@ if test "x$pac_cv_have_cuda" == "xyes"; then #include #include -int main(int argc, char *argv[]) +int main(int argc, char **argv) { printf("%d",CUDA_VERSION); return(0); diff --git a/configure b/configure index 0421872d..3d8c5cd6 100755 --- a/configure +++ b/configure @@ -10786,7 +10786,7 @@ printf %s "checking for CUDA version... " >&6; } #include #include -int main(int argc, char *argv) +int main(int argc, char **argv) { printf("%d",CUDA_VERSION); return(0); From 0760e4d553e84cc495a3d6707f5bd206c41238fa Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 4 Apr 2024 10:53:20 +0200 Subject: [PATCH 074/110] Fix C function declarations for compilation with LLVM/clang in CUDA --- cuda/cuda_util.c | 14 ++++++++++++++ cuda/cuda_util.h | 1 + cuda/elldev.c | 2 +- cuda/elldev.h | 1 + cuda/fcusparse_fct.h | 2 +- 5 files changed, 18 insertions(+), 2 deletions(-) diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c index 3fe61cc0..b7d0199e 100644 --- a/cuda/cuda_util.c +++ b/cuda/cuda_util.c @@ -378,22 +378,36 @@ void cudaSync() { cudaError_t err; err = cudaDeviceSynchronize(); +#if 0 if (err == cudaSuccess) return SPGPU_SUCCESS; else { fprintf(stderr,"CUDA Error cudaSync: %s\n", cudaGetErrorString(err)); return SPGPU_UNSPECIFIED; } +#else + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error cudaSync: %s\n", cudaGetErrorString(err)); + } + return ; +#endif } void cudaReset() { cudaError_t err; err = cudaDeviceReset(); +#if 0 if (err != cudaSuccess) { fprintf(stderr,"CUDA Error Reset: %s\n", cudaGetErrorString(err)); return SPGPU_UNSPECIFIED; } +#else + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error Reset: %s\n", cudaGetErrorString(err)); + } + return ; +#endif } diff --git a/cuda/cuda_util.h b/cuda/cuda_util.h index 95c8d1dc..4eafb5bf 100644 --- a/cuda/cuda_util.h +++ b/cuda/cuda_util.h @@ -54,6 +54,7 @@ int freeRemoteBuffer(void* buffer); int gpuInit(int dev); int getDeviceCount(); int getDevice(); +int getDeviceHasUVA(); int setDevice(int dev); int getGPUMultiProcessors(); int getGPUMemoryBusWidth(); diff --git a/cuda/elldev.c b/cuda/elldev.c index 3b79a863..a5d893c1 100644 --- a/cuda/elldev.c +++ b/cuda/elldev.c @@ -112,7 +112,7 @@ void zeroEllDevice(void *remoteMatrix) else if (tmp->dataType == SPGPU_TYPE_COMPLEX_DOUBLE) cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuDoubleComplex)); else - return SPGPU_UNSUPPORTED; // Unsupported params + return ; // Unsupported params //fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, // tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM); diff --git a/cuda/elldev.h b/cuda/elldev.h index c1001439..5305057a 100644 --- a/cuda/elldev.h +++ b/cuda/elldev.h @@ -95,6 +95,7 @@ typedef struct EllDeviceParams unsigned int firstIndex; } EllDeviceParams; +int computeEllAllocPitch(int rowsCount); int FallocEllDevice(void** deviceMat, unsigned int rows, unsigned int maxRowSize, unsigned int nnzeros, unsigned int columns, unsigned int elementType, diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 689bdc93..28d900da 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -320,7 +320,7 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, } #if CUDA_VERSION >= 11030 -T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat) +int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat) { int64_t tr,tc,tz; tr = cMat->m; From 2f575894fcf0f31de8df480c2549ff8184abb196 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 15 Apr 2024 11:18:51 +0200 Subject: [PATCH 075/110] Fix --with-cudacc in configure --- configure | 4 +- configure.ac | 2 +- prec/impl/psb_cilu_fct.f90 | 438 ------------------------------------ prec/impl/psb_dilu_fct.f90 | 441 ------------------------------------- prec/impl/psb_silu_fct.f90 | 440 ------------------------------------ prec/impl/psb_zilu_fct.f90 | 438 ------------------------------------ 6 files changed, 3 insertions(+), 1760 deletions(-) delete mode 100644 prec/impl/psb_cilu_fct.f90 delete mode 100644 prec/impl/psb_dilu_fct.f90 delete mode 100644 prec/impl/psb_silu_fct.f90 delete mode 100644 prec/impl/psb_zilu_fct.f90 diff --git a/configure b/configure index 3d8c5cd6..b7e5f329 100755 --- a/configure +++ b/configure @@ -12296,7 +12296,7 @@ fi CCOPT : ${CCOPT} CUDA : ${HAVE_CUDA} - CUDA_CC : ${CUDA_CC} + CUDA_CC : ${pac_cv_cudacc} BLAS : ${BLAS_LIBS} @@ -12329,7 +12329,7 @@ printf "%s\n" "$as_me: CCOPT : ${CCOPT} CUDA : ${HAVE_CUDA} - CUDA_CC : ${CUDA_CC} + CUDA_CC : ${pac_cv_cudacc} BLAS : ${BLAS_LIBS} diff --git a/configure.ac b/configure.ac index c5cff358..10f9e2f4 100755 --- a/configure.ac +++ b/configure.ac @@ -969,7 +969,7 @@ AC_MSG_NOTICE([ CCOPT : ${CCOPT} CUDA : ${HAVE_CUDA} - CUDA_CC : ${CUDA_CC} + CUDA_CC : ${pac_cv_cudacc} BLAS : ${BLAS_LIBS} diff --git a/prec/impl/psb_cilu_fct.f90 b/prec/impl/psb_cilu_fct.f90 deleted file mode 100644 index d54769bf..00000000 --- a/prec/impl/psb_cilu_fct.f90 +++ /dev/null @@ -1,438 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_cilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_cspmat_type),intent(in) :: a - type(psb_c_csr_sparse_mat),intent(inout) :: l,u - type(psb_cspmat_type),intent(in), optional, target :: blck - complex(psb_spk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1, l2,m,err_act - type(psb_cspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_cilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_cilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_cilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - implicit none - - type(psb_cspmat_type) :: a,b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - complex(psb_spk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - complex(psb_spk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_c_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_cilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = czero - - ! - ! - select type(aa => a%a) - type is (psb_c_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = cone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = czero - - select type(aa => b%a) - type is (psb_c_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = cone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_cilu_fctint -end subroutine psb_cilu_fct diff --git a/prec/impl/psb_dilu_fct.f90 b/prec/impl/psb_dilu_fct.f90 deleted file mode 100644 index b97b88ec..00000000 --- a/prec/impl/psb_dilu_fct.f90 +++ /dev/null @@ -1,441 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_dilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_dspmat_type),intent(in) :: a - type(psb_d_csr_sparse_mat),intent(inout) :: l,u - type(psb_dspmat_type),intent(in), optional, target :: blck - real(psb_dpk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1,l2,m,err_act - type(psb_dspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_dilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_dilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_dilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - use psb_mat_mod - - implicit none - - type(psb_dspmat_type), target :: a - type(psb_dspmat_type), target :: b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - real(psb_dpk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - real(psb_dpk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_d_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - - name='psb_dilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = dzero - ! - ! - select type(aa => a%a) - type is (psb_d_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (dabs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') dia - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = done/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = dzero - - select type(aa => b%a) - type is (psb_d_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (dabs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') dia - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = done/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_dilu_fctint -end subroutine psb_dilu_fct diff --git a/prec/impl/psb_silu_fct.f90 b/prec/impl/psb_silu_fct.f90 deleted file mode 100644 index 85b58bad..00000000 --- a/prec/impl/psb_silu_fct.f90 +++ /dev/null @@ -1,440 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_silu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_sspmat_type),intent(in) :: a - type(psb_s_csr_sparse_mat),intent(inout) :: l,u - type(psb_sspmat_type),intent(in), optional, target :: blck - real(psb_spk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1,l2,m,err_act - type(psb_sspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_silu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_silu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_silu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - use psb_mat_mod - - implicit none - - type(psb_sspmat_type) :: a - type(psb_sspmat_type) :: b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - real(psb_spk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - real(psb_spk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_s_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_silu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = szero - ! - ! - select type(aa => a%a) - type is (psb_s_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') dia - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = sone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = szero - - select type(aa => b%a) - type is (psb_s_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') dia - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = sone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_silu_fctint -end subroutine psb_silu_fct diff --git a/prec/impl/psb_zilu_fct.f90 b/prec/impl/psb_zilu_fct.f90 deleted file mode 100644 index e5ea4b0d..00000000 --- a/prec/impl/psb_zilu_fct.f90 +++ /dev/null @@ -1,438 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_zilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_zspmat_type),intent(in) :: a - type(psb_z_csr_sparse_mat),intent(inout) :: l,u - type(psb_zspmat_type),intent(in), optional, target :: blck - complex(psb_dpk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1, l2,m,err_act - type(psb_zspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_zilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_zilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_zilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - implicit none - - type(psb_zspmat_type) :: a,b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - complex(psb_dpk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - complex(psb_dpk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_z_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_zilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = zzero - - ! - ! - select type(aa => a%a) - type is (psb_z_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = zone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = zzero - - select type(aa => b%a) - type is (psb_z_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = zone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_zilu_fctint -end subroutine psb_zilu_fct From 553531eefbffbb7f4f0092055350b88aa3288ee4 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 15 Apr 2024 12:16:43 +0200 Subject: [PATCH 076/110] Take out obsolete ilu_fct source files --- prec/impl/Makefile | 8 +- prec/impl/psb_cilu_fct.f90 | 438 ------------------------------------ prec/impl/psb_dilu_fct.f90 | 441 ------------------------------------- prec/impl/psb_silu_fct.f90 | 440 ------------------------------------ prec/impl/psb_zilu_fct.f90 | 438 ------------------------------------ 5 files changed, 4 insertions(+), 1761 deletions(-) delete mode 100644 prec/impl/psb_cilu_fct.f90 delete mode 100644 prec/impl/psb_dilu_fct.f90 delete mode 100644 prec/impl/psb_silu_fct.f90 delete mode 100644 prec/impl/psb_zilu_fct.f90 diff --git a/prec/impl/Makefile b/prec/impl/Makefile index bc5ef2e1..2b6b1dc5 100644 --- a/prec/impl/Makefile +++ b/prec/impl/Makefile @@ -7,16 +7,16 @@ HERE=.. OBJS=psb_s_prec_type_impl.o psb_d_prec_type_impl.o \ psb_c_prec_type_impl.o psb_z_prec_type_impl.o \ psb_d_diagprec_impl.o psb_d_bjacprec_impl.o psb_d_nullprec_impl.o \ - psb_dilu_fct.o psb_d_ilu0_fact.o psb_d_iluk_fact.o psb_d_ilut_fact.o \ + psb_d_ilu0_fact.o psb_d_iluk_fact.o psb_d_ilut_fact.o \ psb_dprecbld.o psb_dprecinit.o \ psb_s_diagprec_impl.o psb_s_bjacprec_impl.o psb_s_nullprec_impl.o \ - psb_silu_fct.o psb_s_ilu0_fact.o psb_s_iluk_fact.o psb_s_ilut_fact.o \ + psb_s_ilu0_fact.o psb_s_iluk_fact.o psb_s_ilut_fact.o \ psb_sprecbld.o psb_sprecinit.o \ psb_c_diagprec_impl.o psb_c_bjacprec_impl.o psb_c_nullprec_impl.o \ - psb_cilu_fct.o psb_c_ilu0_fact.o psb_c_iluk_fact.o psb_c_ilut_fact.o \ + psb_c_ilu0_fact.o psb_c_iluk_fact.o psb_c_ilut_fact.o \ psb_cprecbld.o psb_cprecinit.o \ psb_z_diagprec_impl.o psb_z_bjacprec_impl.o psb_z_nullprec_impl.o \ - psb_zilu_fct.o psb_z_ilu0_fact.o psb_z_iluk_fact.o psb_z_ilut_fact.o \ + psb_z_ilu0_fact.o psb_z_iluk_fact.o psb_z_ilut_fact.o \ psb_zprecbld.o psb_zprecinit.o \ psb_c_sparsify.o psb_d_sparsify.o psb_s_sparsify.o psb_z_sparsify.o \ psb_crwclip.o psb_drwclip.o psb_srwclip.o psb_zrwclip.o \ diff --git a/prec/impl/psb_cilu_fct.f90 b/prec/impl/psb_cilu_fct.f90 deleted file mode 100644 index d54769bf..00000000 --- a/prec/impl/psb_cilu_fct.f90 +++ /dev/null @@ -1,438 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_cilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_cspmat_type),intent(in) :: a - type(psb_c_csr_sparse_mat),intent(inout) :: l,u - type(psb_cspmat_type),intent(in), optional, target :: blck - complex(psb_spk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1, l2,m,err_act - type(psb_cspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_cilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_cilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_cilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - implicit none - - type(psb_cspmat_type) :: a,b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - complex(psb_spk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - complex(psb_spk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_c_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_cilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = czero - - ! - ! - select type(aa => a%a) - type is (psb_c_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = cone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = czero - - select type(aa => b%a) - type is (psb_c_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = cone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_cilu_fctint -end subroutine psb_cilu_fct diff --git a/prec/impl/psb_dilu_fct.f90 b/prec/impl/psb_dilu_fct.f90 deleted file mode 100644 index b97b88ec..00000000 --- a/prec/impl/psb_dilu_fct.f90 +++ /dev/null @@ -1,441 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_dilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_dspmat_type),intent(in) :: a - type(psb_d_csr_sparse_mat),intent(inout) :: l,u - type(psb_dspmat_type),intent(in), optional, target :: blck - real(psb_dpk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1,l2,m,err_act - type(psb_dspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_dilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_dilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_dilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - use psb_mat_mod - - implicit none - - type(psb_dspmat_type), target :: a - type(psb_dspmat_type), target :: b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - real(psb_dpk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - real(psb_dpk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_d_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - - name='psb_dilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = dzero - ! - ! - select type(aa => a%a) - type is (psb_d_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (dabs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') dia - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = done/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = dzero - - select type(aa => b%a) - type is (psb_d_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (dabs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') dia - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = done/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_dilu_fctint -end subroutine psb_dilu_fct diff --git a/prec/impl/psb_silu_fct.f90 b/prec/impl/psb_silu_fct.f90 deleted file mode 100644 index 85b58bad..00000000 --- a/prec/impl/psb_silu_fct.f90 +++ /dev/null @@ -1,440 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_silu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_sspmat_type),intent(in) :: a - type(psb_s_csr_sparse_mat),intent(inout) :: l,u - type(psb_sspmat_type),intent(in), optional, target :: blck - real(psb_spk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1,l2,m,err_act - type(psb_sspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_silu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_silu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_silu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - use psb_mat_mod - - implicit none - - type(psb_sspmat_type) :: a - type(psb_sspmat_type) :: b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - real(psb_spk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - real(psb_spk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_s_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_silu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = szero - ! - ! - select type(aa => a%a) - type is (psb_s_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') dia - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = sone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = szero - - select type(aa => b%a) - type is (psb_s_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') dia - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = sone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_silu_fctint -end subroutine psb_silu_fct diff --git a/prec/impl/psb_zilu_fct.f90 b/prec/impl/psb_zilu_fct.f90 deleted file mode 100644 index e5ea4b0d..00000000 --- a/prec/impl/psb_zilu_fct.f90 +++ /dev/null @@ -1,438 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_zilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_zspmat_type),intent(in) :: a - type(psb_z_csr_sparse_mat),intent(inout) :: l,u - type(psb_zspmat_type),intent(in), optional, target :: blck - complex(psb_dpk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1, l2,m,err_act - type(psb_zspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_zilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_zilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_zilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - implicit none - - type(psb_zspmat_type) :: a,b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - complex(psb_dpk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - complex(psb_dpk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_z_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_zilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = zzero - - ! - ! - select type(aa => a%a) - type is (psb_z_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = zone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = zzero - - select type(aa => b%a) - type is (psb_z_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = zone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_zilu_fctint -end subroutine psb_zilu_fct From e18de650f2379f6cb35d0eac81145a0e03e1340c Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 17 Apr 2024 13:53:04 +0200 Subject: [PATCH 077/110] Take out debug print --- cuda/fcusparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda/fcusparse.c b/cuda/fcusparse.c index 1b37272c..e8e46c63 100644 --- a/cuda/fcusparse.c +++ b/cuda/fcusparse.c @@ -53,7 +53,7 @@ int FcusparseCreate() if (ret == CUSPARSE_STATUS_SUCCESS) cusparse_handle = handle; } - fprintf(stderr,"Created cusparses_handle\n"); + //fprintf(stderr,"Created cusparses_handle\n"); return (ret); } From aca1848401829ccfc8de8c8834341649589d6b17 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 17 Apr 2024 13:53:23 +0200 Subject: [PATCH 078/110] New timings in CG --- base/internals/psi_desc_index.F90 | 2 +- base/tools/psb_icdasb.F90 | 2 +- krylov/psb_dcg.F90 | 31 +++++++++++++++++++++++++++---- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/base/internals/psi_desc_index.F90 b/base/internals/psi_desc_index.F90 index 35c8d921..6423c116 100644 --- a/base/internals/psi_desc_index.F90 +++ b/base/internals/psi_desc_index.F90 @@ -137,7 +137,7 @@ subroutine psi_i_desc_index(desc,index_in,dep_list,& & idxr, idxs, iszs, iszr, nesd, nerv, ixp, idx integer(psb_mpk_) :: icomm, minfo - logical, parameter :: do_timings=.true., oldstyle=.false., debug=.false. + logical, parameter :: do_timings=.false., oldstyle=.false., debug=.false. integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1, idx_phase4=-1 logical, parameter :: usempi=.false. integer(psb_ipk_) :: debug_level, debug_unit diff --git a/base/tools/psb_icdasb.F90 b/base/tools/psb_icdasb.F90 index 31d92133..04f95738 100644 --- a/base/tools/psb_icdasb.F90 +++ b/base/tools/psb_icdasb.F90 @@ -67,7 +67,7 @@ subroutine psb_icdasb(desc,info,ext_hv,mold) integer(psb_mpk_) :: icomm integer(psb_ipk_) :: np,me logical :: ext_hv_ - logical, parameter :: do_timings=.true. + logical, parameter :: do_timings=.false. integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1 integer(psb_ipk_), save :: idx_phase11=-1, idx_phase12=-1, idx_phase13=-1 integer(psb_ipk_), save :: idx_total=-1 diff --git a/krylov/psb_dcg.F90 b/krylov/psb_dcg.F90 index 669573be..caebb712 100644 --- a/krylov/psb_dcg.F90 +++ b/krylov/psb_dcg.F90 @@ -129,6 +129,8 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& logical :: do_cond character(len=20) :: name character(len=*), parameter :: methdname='CG' + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: cg_vect=-1, cg_mv=-1, cg_prec=-1 info = psb_success_ name = 'psb_dcg' @@ -149,6 +151,12 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_errpush(info,name) goto 9999 endif + if ((do_timings).and.(cg_vect==-1)) & + & cg_vect = psb_get_timer_idx("CG: vector ops ") + if ((do_timings).and.(cg_mv==-1)) & + & cg_mv = psb_get_timer_idx("CG: MV product") + if ((do_timings).and.(cg_prec==-1)) & + & cg_prec = psb_get_timer_idx("CG: preconditioner") mglob = desc_a%get_global_rows() @@ -219,17 +227,21 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& ! = ! = r0 = b-Ax0 ! = + if (do_timings) call psb_tic(cg_vect) if (itx>= itmax_) exit restart - it = 0 call psb_geaxpby(done,b,dzero,r,desc_a,info) + if (do_timings) call psb_toc(cg_vect) + if (do_timings) call psb_tic(cg_mv) if (info == psb_success_) call psb_spmm(-done,a,x,done,r,desc_a,info,work=aux) if (info /= psb_success_) then info=psb_err_from_subroutine_non_ call psb_errpush(info,name) goto 9999 end if - + if (do_timings) call psb_toc(cg_mv) + + if (do_timings) call psb_tic(cg_vect) rho = dzero call psb_init_conv(methdname,istop_,itrace_,itmax_,a,x,b,eps,desc_a,stopdat,info) @@ -237,13 +249,18 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_errpush(psb_err_from_subroutine_non_,name) goto 9999 End If - + if (do_timings) call psb_toc(cg_vect) + iteration: do it = it + 1 itx = itx + 1 - + if (do_timings) call psb_tic(cg_prec) + call prec%apply(r,z,desc_a,info,work=aux) + if (do_timings) call psb_toc(cg_prec) + if (do_timings) call psb_tic(cg_vect) + rho_old = rho rho = psb_gedot(r,z,desc_a,info) @@ -254,13 +271,18 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& if (debug_level >= psb_debug_ext_)& & write(debug_unit,*) me,' ',trim(name),& & ': CG Iteration breakdown rho' + if (do_timings) call psb_toc(cg_vect) exit iteration endif beta = rho/rho_old call psb_geaxpby(done,z,beta,p,desc_a,info) end if + if (do_timings) call psb_toc(cg_vect) + if (do_timings) call psb_tic(cg_mv) call psb_spmm(done,a,p,dzero,q,desc_a,info,work=aux) + if (do_timings) call psb_toc(cg_mv) + if (do_timings) call psb_tic(cg_vect) sigma = psb_gedot(p,q,desc_a,info) if (sigma == dzero) then if (debug_level >= psb_debug_ext_)& @@ -293,6 +315,7 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& end do iteration end do restart + if (do_timings) call psb_toc(cg_vect) if (do_cond) then if (me == psb_root_) then #if defined(HAVE_LAPACK) From ba8c32c507160763fc81406ddc16c5c67f07a4ca Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 17 Apr 2024 14:27:35 +0200 Subject: [PATCH 079/110] Define merge_nd method --- base/modules/serial/psb_c_mat_mod.F90 | 13 +++++-- base/modules/serial/psb_d_mat_mod.F90 | 13 +++++-- base/modules/serial/psb_s_mat_mod.F90 | 13 +++++-- base/modules/serial/psb_z_mat_mod.F90 | 13 +++++-- base/serial/impl/psb_c_mat_impl.F90 | 50 +++++++++++++++++++++++++++ base/serial/impl/psb_d_mat_impl.F90 | 50 +++++++++++++++++++++++++++ base/serial/impl/psb_s_mat_impl.F90 | 50 +++++++++++++++++++++++++++ base/serial/impl/psb_z_mat_impl.F90 | 50 +++++++++++++++++++++++++++ 8 files changed, 240 insertions(+), 12 deletions(-) diff --git a/base/modules/serial/psb_c_mat_mod.F90 b/base/modules/serial/psb_c_mat_mod.F90 index ee819535..e7c84b00 100644 --- a/base/modules/serial/psb_c_mat_mod.F90 +++ b/base/modules/serial/psb_c_mat_mod.F90 @@ -205,6 +205,7 @@ module psb_c_mat_mod procedure, pass(a) :: cscnv_base => psb_c_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base procedure, pass(a) :: split_nd => psb_c_split_nd + procedure, pass(a) :: merge_nd => psb_c_merge_nd procedure, pass(a) :: clone => psb_cspmat_clone procedure, pass(a) :: move_alloc => psb_cspmat_type_move ! @@ -849,11 +850,17 @@ module psb_c_mat_mod class(psb_cspmat_type), intent(inout) :: a integer(psb_ipk_), intent(in) :: n_rows, n_cols integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_),optional, intent(in) :: dupl -!!$ character(len=*), optional, intent(in) :: type -!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold end subroutine psb_c_split_nd end interface + + interface + subroutine psb_c_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_merge_nd + end interface ! ! CSCNV: switches to a different internal derived type. diff --git a/base/modules/serial/psb_d_mat_mod.F90 b/base/modules/serial/psb_d_mat_mod.F90 index 82d2e822..fe09d83a 100644 --- a/base/modules/serial/psb_d_mat_mod.F90 +++ b/base/modules/serial/psb_d_mat_mod.F90 @@ -205,6 +205,7 @@ module psb_d_mat_mod procedure, pass(a) :: cscnv_base => psb_d_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base procedure, pass(a) :: split_nd => psb_d_split_nd + procedure, pass(a) :: merge_nd => psb_d_merge_nd procedure, pass(a) :: clone => psb_dspmat_clone procedure, pass(a) :: move_alloc => psb_dspmat_type_move ! @@ -849,11 +850,17 @@ module psb_d_mat_mod class(psb_dspmat_type), intent(inout) :: a integer(psb_ipk_), intent(in) :: n_rows, n_cols integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_),optional, intent(in) :: dupl -!!$ character(len=*), optional, intent(in) :: type -!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold end subroutine psb_d_split_nd end interface + + interface + subroutine psb_d_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_merge_nd + end interface ! ! CSCNV: switches to a different internal derived type. diff --git a/base/modules/serial/psb_s_mat_mod.F90 b/base/modules/serial/psb_s_mat_mod.F90 index d8a2e6ae..868583b2 100644 --- a/base/modules/serial/psb_s_mat_mod.F90 +++ b/base/modules/serial/psb_s_mat_mod.F90 @@ -205,6 +205,7 @@ module psb_s_mat_mod procedure, pass(a) :: cscnv_base => psb_s_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base procedure, pass(a) :: split_nd => psb_s_split_nd + procedure, pass(a) :: merge_nd => psb_s_merge_nd procedure, pass(a) :: clone => psb_sspmat_clone procedure, pass(a) :: move_alloc => psb_sspmat_type_move ! @@ -849,11 +850,17 @@ module psb_s_mat_mod class(psb_sspmat_type), intent(inout) :: a integer(psb_ipk_), intent(in) :: n_rows, n_cols integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_),optional, intent(in) :: dupl -!!$ character(len=*), optional, intent(in) :: type -!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold end subroutine psb_s_split_nd end interface + + interface + subroutine psb_s_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_merge_nd + end interface ! ! CSCNV: switches to a different internal derived type. diff --git a/base/modules/serial/psb_z_mat_mod.F90 b/base/modules/serial/psb_z_mat_mod.F90 index 694d4efc..48b670de 100644 --- a/base/modules/serial/psb_z_mat_mod.F90 +++ b/base/modules/serial/psb_z_mat_mod.F90 @@ -205,6 +205,7 @@ module psb_z_mat_mod procedure, pass(a) :: cscnv_base => psb_z_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base procedure, pass(a) :: split_nd => psb_z_split_nd + procedure, pass(a) :: merge_nd => psb_z_merge_nd procedure, pass(a) :: clone => psb_zspmat_clone procedure, pass(a) :: move_alloc => psb_zspmat_type_move ! @@ -849,11 +850,17 @@ module psb_z_mat_mod class(psb_zspmat_type), intent(inout) :: a integer(psb_ipk_), intent(in) :: n_rows, n_cols integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_),optional, intent(in) :: dupl -!!$ character(len=*), optional, intent(in) :: type -!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold end subroutine psb_z_split_nd end interface + + interface + subroutine psb_z_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_merge_nd + end interface ! ! CSCNV: switches to a different internal derived type. diff --git a/base/serial/impl/psb_c_mat_impl.F90 b/base/serial/impl/psb_c_mat_impl.F90 index bbac0406..532cb9b8 100644 --- a/base/serial/impl/psb_c_mat_impl.F90 +++ b/base/serial/impl/psb_c_mat_impl.F90 @@ -1263,6 +1263,56 @@ subroutine psb_c_split_nd(a,n_rows,n_cols,info) end subroutine psb_c_split_nd +subroutine psb_c_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_c_mat_mod, psb_protect_name => psb_c_merge_nd + implicit none + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold + type(psb_c_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_merge_nd + subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod diff --git a/base/serial/impl/psb_d_mat_impl.F90 b/base/serial/impl/psb_d_mat_impl.F90 index 9af64b3f..e48654ce 100644 --- a/base/serial/impl/psb_d_mat_impl.F90 +++ b/base/serial/impl/psb_d_mat_impl.F90 @@ -1263,6 +1263,56 @@ subroutine psb_d_split_nd(a,n_rows,n_cols,info) end subroutine psb_d_split_nd +subroutine psb_d_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_d_mat_mod, psb_protect_name => psb_d_merge_nd + implicit none + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold + type(psb_d_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_merge_nd + subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod diff --git a/base/serial/impl/psb_s_mat_impl.F90 b/base/serial/impl/psb_s_mat_impl.F90 index c0370774..77ac81e6 100644 --- a/base/serial/impl/psb_s_mat_impl.F90 +++ b/base/serial/impl/psb_s_mat_impl.F90 @@ -1263,6 +1263,56 @@ subroutine psb_s_split_nd(a,n_rows,n_cols,info) end subroutine psb_s_split_nd +subroutine psb_s_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_s_mat_mod, psb_protect_name => psb_s_merge_nd + implicit none + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold + type(psb_s_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_merge_nd + subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod diff --git a/base/serial/impl/psb_z_mat_impl.F90 b/base/serial/impl/psb_z_mat_impl.F90 index 20815cb0..18d6c03d 100644 --- a/base/serial/impl/psb_z_mat_impl.F90 +++ b/base/serial/impl/psb_z_mat_impl.F90 @@ -1263,6 +1263,56 @@ subroutine psb_z_split_nd(a,n_rows,n_cols,info) end subroutine psb_z_split_nd +subroutine psb_z_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_z_mat_mod, psb_protect_name => psb_z_merge_nd + implicit none + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold + type(psb_z_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_merge_nd + subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod From 025350a361dbfc8775dd1fa4028d79aa3567ebee Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 23 Apr 2024 14:06:27 +0200 Subject: [PATCH 080/110] Make sure realloc is always called with size >0 --- base/serial/impl/psb_c_csc_impl.F90 | 8 ++++---- base/serial/impl/psb_c_csr_impl.F90 | 8 ++++---- base/serial/impl/psb_d_csc_impl.F90 | 8 ++++---- base/serial/impl/psb_d_csr_impl.F90 | 8 ++++---- base/serial/impl/psb_s_csc_impl.F90 | 8 ++++---- base/serial/impl/psb_s_csr_impl.F90 | 8 ++++---- base/serial/impl/psb_z_csc_impl.F90 | 8 ++++---- base/serial/impl/psb_z_csr_impl.F90 | 8 ++++---- 8 files changed, 32 insertions(+), 32 deletions(-) diff --git a/base/serial/impl/psb_c_csc_impl.F90 b/base/serial/impl/psb_c_csc_impl.F90 index 54332d06..fe7227dd 100644 --- a/base/serial/impl/psb_c_csc_impl.F90 +++ b/base/serial/impl/psb_c_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_c_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2328,7 +2328,7 @@ subroutine psb_c_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2461,7 +2461,7 @@ subroutine psb_c_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4058,7 @@ subroutine psb_lc_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_c_csr_impl.F90 b/base/serial/impl/psb_c_csr_impl.F90 index 6c21f639..9354098b 100644 --- a/base/serial/impl/psb_c_csr_impl.F90 +++ b/base/serial/impl/psb_c_csr_impl.F90 @@ -3318,7 +3318,7 @@ subroutine psb_c_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3489,7 +3489,7 @@ subroutine psb_c_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_c_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -6281,7 +6281,7 @@ subroutine psb_lc_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_d_csc_impl.F90 b/base/serial/impl/psb_d_csc_impl.F90 index 1761b051..d9fa2874 100644 --- a/base/serial/impl/psb_d_csc_impl.F90 +++ b/base/serial/impl/psb_d_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_d_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2328,7 +2328,7 @@ subroutine psb_d_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2461,7 +2461,7 @@ subroutine psb_d_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4058,7 @@ subroutine psb_ld_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_d_csr_impl.F90 b/base/serial/impl/psb_d_csr_impl.F90 index 9f1d509c..bc0efcc9 100644 --- a/base/serial/impl/psb_d_csr_impl.F90 +++ b/base/serial/impl/psb_d_csr_impl.F90 @@ -3318,7 +3318,7 @@ subroutine psb_d_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3489,7 +3489,7 @@ subroutine psb_d_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_d_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -6281,7 +6281,7 @@ subroutine psb_ld_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_s_csc_impl.F90 b/base/serial/impl/psb_s_csc_impl.F90 index a66b7dc0..c135c9ef 100644 --- a/base/serial/impl/psb_s_csc_impl.F90 +++ b/base/serial/impl/psb_s_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_s_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2328,7 +2328,7 @@ subroutine psb_s_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2461,7 +2461,7 @@ subroutine psb_s_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4058,7 @@ subroutine psb_ls_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_s_csr_impl.F90 b/base/serial/impl/psb_s_csr_impl.F90 index a4e1ab82..ba70e021 100644 --- a/base/serial/impl/psb_s_csr_impl.F90 +++ b/base/serial/impl/psb_s_csr_impl.F90 @@ -3318,7 +3318,7 @@ subroutine psb_s_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3489,7 +3489,7 @@ subroutine psb_s_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_s_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -6281,7 +6281,7 @@ subroutine psb_ls_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_z_csc_impl.F90 b/base/serial/impl/psb_z_csc_impl.F90 index e5516bd9..becb7003 100644 --- a/base/serial/impl/psb_z_csc_impl.F90 +++ b/base/serial/impl/psb_z_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_z_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2328,7 +2328,7 @@ subroutine psb_z_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2461,7 +2461,7 @@ subroutine psb_z_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4058,7 @@ subroutine psb_lz_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_z_csr_impl.F90 b/base/serial/impl/psb_z_csr_impl.F90 index 28ac121e..23a4fb84 100644 --- a/base/serial/impl/psb_z_csr_impl.F90 +++ b/base/serial/impl/psb_z_csr_impl.F90 @@ -3318,7 +3318,7 @@ subroutine psb_z_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3489,7 +3489,7 @@ subroutine psb_z_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_z_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -6281,7 +6281,7 @@ subroutine psb_lz_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat call b%set_nzeros(a%get_nzeros()) From 5e2e1e34fde0e9f82105820878a58b6ce890aa62 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Thu, 25 Apr 2024 18:59:24 +0200 Subject: [PATCH 081/110] Introduce set_host() in inner_vect_sv --- base/serial/impl/psb_c_base_mat_impl.F90 | 5 +++-- base/serial/impl/psb_d_base_mat_impl.F90 | 5 +++-- base/serial/impl/psb_s_base_mat_impl.F90 | 5 +++-- base/serial/impl/psb_z_base_mat_impl.F90 | 5 +++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/base/serial/impl/psb_c_base_mat_impl.F90 b/base/serial/impl/psb_c_base_mat_impl.F90 index 17f2cdc8..2a2ed693 100644 --- a/base/serial/impl/psb_c_base_mat_impl.F90 +++ b/base/serial/impl/psb_c_base_mat_impl.F90 @@ -2060,8 +2060,6 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2161,8 +2159,11 @@ subroutine psb_c_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ diff --git a/base/serial/impl/psb_d_base_mat_impl.F90 b/base/serial/impl/psb_d_base_mat_impl.F90 index 69112529..73c2c920 100644 --- a/base/serial/impl/psb_d_base_mat_impl.F90 +++ b/base/serial/impl/psb_d_base_mat_impl.F90 @@ -2060,8 +2060,6 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2161,8 +2159,11 @@ subroutine psb_d_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ diff --git a/base/serial/impl/psb_s_base_mat_impl.F90 b/base/serial/impl/psb_s_base_mat_impl.F90 index 4a99a684..1c52d982 100644 --- a/base/serial/impl/psb_s_base_mat_impl.F90 +++ b/base/serial/impl/psb_s_base_mat_impl.F90 @@ -2060,8 +2060,6 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2161,8 +2159,11 @@ subroutine psb_s_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ diff --git a/base/serial/impl/psb_z_base_mat_impl.F90 b/base/serial/impl/psb_z_base_mat_impl.F90 index 404027c5..ef7b2dd6 100644 --- a/base/serial/impl/psb_z_base_mat_impl.F90 +++ b/base/serial/impl/psb_z_base_mat_impl.F90 @@ -2060,8 +2060,6 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2161,8 +2159,11 @@ subroutine psb_z_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ From d444a12879c02f7ef2505c67e49b7d1ff0ce6af6 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 26 Apr 2024 11:35:44 +0200 Subject: [PATCH 082/110] Condition call to x%sync() in vect_mv --- base/serial/impl/psb_c_base_mat_impl.F90 | 4 ++-- base/serial/impl/psb_d_base_mat_impl.F90 | 4 ++-- base/serial/impl/psb_s_base_mat_impl.F90 | 4 ++-- base/serial/impl/psb_z_base_mat_impl.F90 | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/base/serial/impl/psb_c_base_mat_impl.F90 b/base/serial/impl/psb_c_base_mat_impl.F90 index 2a2ed693..f4bc43cc 100644 --- a/base/serial/impl/psb_c_base_mat_impl.F90 +++ b/base/serial/impl/psb_c_base_mat_impl.F90 @@ -2006,8 +2006,8 @@ subroutine psb_c_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_c_base_vect_mv diff --git a/base/serial/impl/psb_d_base_mat_impl.F90 b/base/serial/impl/psb_d_base_mat_impl.F90 index 73c2c920..1a8dc084 100644 --- a/base/serial/impl/psb_d_base_mat_impl.F90 +++ b/base/serial/impl/psb_d_base_mat_impl.F90 @@ -2006,8 +2006,8 @@ subroutine psb_d_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_d_base_vect_mv diff --git a/base/serial/impl/psb_s_base_mat_impl.F90 b/base/serial/impl/psb_s_base_mat_impl.F90 index 1c52d982..0b47d472 100644 --- a/base/serial/impl/psb_s_base_mat_impl.F90 +++ b/base/serial/impl/psb_s_base_mat_impl.F90 @@ -2006,8 +2006,8 @@ subroutine psb_s_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_s_base_vect_mv diff --git a/base/serial/impl/psb_z_base_mat_impl.F90 b/base/serial/impl/psb_z_base_mat_impl.F90 index ef7b2dd6..2d97f698 100644 --- a/base/serial/impl/psb_z_base_mat_impl.F90 +++ b/base/serial/impl/psb_z_base_mat_impl.F90 @@ -2006,8 +2006,8 @@ subroutine psb_z_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_z_base_vect_mv From e72c0f0bf96c3fc56c83620230caac5d1463ee7c Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 29 Apr 2024 15:04:23 +0200 Subject: [PATCH 083/110] Fix OMP impl of sparse-sparse product --- base/serial/impl/psb_c_csr_impl.F90 | 54 +++++++++++++++++------------ base/serial/impl/psb_d_csr_impl.F90 | 54 +++++++++++++++++------------ base/serial/impl/psb_s_csr_impl.F90 | 54 +++++++++++++++++------------ base/serial/impl/psb_z_csr_impl.F90 | 54 +++++++++++++++++------------ 4 files changed, 128 insertions(+), 88 deletions(-) diff --git a/base/serial/impl/psb_c_csr_impl.F90 b/base/serial/impl/psb_c_csr_impl.F90 index 9354098b..276d3d1c 100644 --- a/base/serial/impl/psb_c_csr_impl.F90 +++ b/base/serial/impl/psb_c_csr_impl.F90 @@ -3805,6 +3805,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3816,19 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + !$omp end single + !$omp end parallel allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3884,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3899,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3938,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4005,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4020,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d diff --git a/base/serial/impl/psb_d_csr_impl.F90 b/base/serial/impl/psb_d_csr_impl.F90 index bc0efcc9..a0aaeeee 100644 --- a/base/serial/impl/psb_d_csr_impl.F90 +++ b/base/serial/impl/psb_d_csr_impl.F90 @@ -3805,6 +3805,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3816,19 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + !$omp end single + !$omp end parallel allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3884,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3899,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3938,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4005,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4020,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d diff --git a/base/serial/impl/psb_s_csr_impl.F90 b/base/serial/impl/psb_s_csr_impl.F90 index ba70e021..9d5dc145 100644 --- a/base/serial/impl/psb_s_csr_impl.F90 +++ b/base/serial/impl/psb_s_csr_impl.F90 @@ -3805,6 +3805,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3816,19 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + !$omp end single + !$omp end parallel allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3884,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3899,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3938,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4005,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4020,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d diff --git a/base/serial/impl/psb_z_csr_impl.F90 b/base/serial/impl/psb_z_csr_impl.F90 index 23a4fb84..7f11c3bd 100644 --- a/base/serial/impl/psb_z_csr_impl.F90 +++ b/base/serial/impl/psb_z_csr_impl.F90 @@ -3805,6 +3805,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3816,19 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + !$omp end single + !$omp end parallel allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3884,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3899,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3938,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4005,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4020,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d From b99aa7a90f8b369019e985d3090d8ab2d13d63f0 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 29 Apr 2024 15:07:49 +0200 Subject: [PATCH 084/110] Switch off OMP in HASH g2l_ins --- base/modules/desc/psb_hash_map_mod.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index 058dbb8d..752e0baf 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -683,7 +683,7 @@ contains mglob = idxmap%get_gr() nrow = idxmap%get_lr() !write(0,*) me,name,' before loop ',psb_errstatus_fatal() -#ifdef OPENMP +#if 0 && defined(OPENMP) !call OMP_init_lock(ins_lck) if (idxmap%is_bld()) then From 188dee6842d79fa70ec3a435ef03120edc5d3217 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 7 May 2024 04:58:31 -0400 Subject: [PATCH 085/110] Add indx_map%inc_lc() method --- base/modules/desc/psb_indx_map_mod.f90 | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/base/modules/desc/psb_indx_map_mod.f90 b/base/modules/desc/psb_indx_map_mod.f90 index 7753db23..422be5f3 100644 --- a/base/modules/desc/psb_indx_map_mod.f90 +++ b/base/modules/desc/psb_indx_map_mod.f90 @@ -158,6 +158,7 @@ module psb_indx_map_mod procedure, pass(idxmap) :: set_lr => base_set_lr procedure, pass(idxmap) :: set_lc => base_set_lc + procedure, pass(idxmap) :: inc_lc => base_inc_lc procedure, pass(idxmap) :: set_p_adjcncy => base_set_p_adjcncy procedure, pass(idxmap) :: xtnd_p_adjcncy => base_xtnd_p_adjcncy @@ -235,7 +236,7 @@ module psb_indx_map_mod & base_get_gr, base_get_gc, base_get_lr, base_get_lc, base_get_ctxt,& & base_get_mpic, base_sizeof, base_set_null, & & base_set_grl, base_set_gcl, & - & base_set_lr, base_set_lc, base_set_ctxt,& + & base_set_lr, base_set_lc, base_inc_lc, base_set_ctxt,& & base_set_mpic, base_get_fmt, base_asb, base_free,& & base_l2gs1, base_l2gs2, base_l2gv1, base_l2gv2,& & base_g2ls1, base_g2ls2, base_g2lv1, base_g2lv2,& @@ -573,6 +574,14 @@ contains idxmap%local_cols = val end subroutine base_set_lc + subroutine base_inc_lc(idxmap) + implicit none + class(psb_indx_map), intent(inout) :: idxmap + !$omp atomic + idxmap%local_cols = idxmap%local_cols + 1 + !$omp end atomic + end subroutine base_inc_lc + subroutine base_set_p_adjcncy(idxmap,val) use psb_realloc_mod use psb_sort_mod From fa86c914111ef02c0b10fa41d850bba32e8d2b52 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 7 May 2024 05:04:48 -0400 Subject: [PATCH 086/110] Fix OpenMP version of hash_map and hash --- base/modules/desc/psb_hash_map_mod.F90 | 504 ++++++++++--------------- base/modules/desc/psb_hash_mod.F90 | 103 ++--- 2 files changed, 261 insertions(+), 346 deletions(-) diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index 058dbb8d..2ccb9b28 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -363,6 +363,9 @@ contains else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) & + !$omp private(i,ip,lip,tlip,info) do i = 1, is if (mask(i)) then ip = idx(i) @@ -388,7 +391,7 @@ contains endif end if enddo - + !$omp end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idx(1:is) = -1 @@ -404,6 +407,9 @@ contains else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) & + !$omp private(i,ip,lip,tlip,info) do i = 1, is ip = idx(i) if ((ip < 1 ).or.(ip>mglob)) then @@ -427,14 +433,12 @@ contains idx(i) = lip endif enddo - + !$omp end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idx(1:is) = -1 info = -1 - end if - end if end subroutine hash_g2lv1 @@ -493,6 +497,9 @@ contains else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) & + !$omp private(i,ip,lip,tlip,info) do i = 1, is if (mask(i)) then ip = idxin(i) @@ -518,6 +525,7 @@ contains endif end if enddo + !$omp end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idxout(1:is) = -1 @@ -533,6 +541,9 @@ contains else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) & + !$omp private(i,ip,lip,tlip,info) do i = 1, is ip = idxin(i) if ((ip < 1 ).or.(ip>mglob)) then @@ -556,14 +567,12 @@ contains idxout(i) = lip endif enddo - + !$omp end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idxout(1:is) = -1 info = -1 - end if - end if end subroutine hash_g2lv2 @@ -649,7 +658,7 @@ contains & err_act integer(psb_lpk_) :: mglob, ip, nxt, tlip type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: me, np + integer(psb_ipk_) :: me, np,ith character(len=20) :: name,ch_err logical, allocatable :: mask_(:) !!$ logical :: use_openmp = .true. @@ -683,363 +692,243 @@ contains mglob = idxmap%get_gr() nrow = idxmap%get_lr() !write(0,*) me,name,' before loop ',psb_errstatus_fatal() -#ifdef OPENMP - !call OMP_init_lock(ins_lck) - - if (idxmap%is_bld()) then - - isLoopValid = .true. - ncol = idxmap%get_lc() - if (present(mask)) then - mask_ = mask - else - allocate(mask_(size(idx))) - mask_ = .true. - end if +#if defined(OPENMP) + isLoopValid = .true. + if (idxmap%is_bld()) then if (present(lidx)) then - if (present(mask)) then - !$omp critical(hash_g2l_ins) - - ! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & - ! $ OMP shared(name,me,is,idx,ins_lck,mask,mglob,idxmap,ncol,nrow,laddsz,lidx) & - ! $ OMP private(i,ip,lip,tlip,nxt,info) & - ! $ OMP reduction(.AND.:isLoopValid) + if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(lidx,mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp reduction(.AND.:isLoopValid) do i = 1, is - info = 0 - if (.not. isLoopValid) cycle - if (mask(i)) then - ip = idx(i) - if ((ip < 1 ).or.(ip>mglob)) then + if (mask(i)) then + ip = idx(i) + if ((ip < 1 ).or.(ip>mglob) ) then idx(i) = -1 cycle endif - !call OMP_set_lock(ins_lck) ncol = idxmap%get_lc() - !call OMP_unset_lock(ins_lck) - - ! At first, we check the index presence in 'idxmap'. Usually - ! the index is found. If it is not found, we repeat the checking, - ! but inside a critical region. call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) - if (lip < 0) then - !call OMP_set_lock(ins_lck) - - ! We check again if the index is already in 'idxmap', this - ! time inside a critical region (we assume that the index - ! is often already existing). - ncol = idxmap%get_lc() - nxt = lidx(i) - call hash_inner_cnv(ip,lip,idxmap%hashvmask,& - & idxmap%hashv,idxmap%glb_lc,ncol) - - if (lip > 0) then - idx(i) = lip - else if (lip < 0) then - ! Index not found - call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) - lip = tlip - - - if (info >= 0) then - ! 'nxt' is not equal to 'tlip' when the key is already inside - ! the hash map. In that case 'tlip' is the value corresponding - ! to the existing mapping. - if (nxt == tlip) then - - ncol = MAX(ncol,nxt) - - call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& - & pad=-1_psb_lpk_,addsz=laddsz) - - if (info /= psb_success_) then - !write(0,*) 'Error spot 1' - call psb_errpush(psb_err_from_subroutine_ai_,name,& - &a_err='psb_ensure_size',i_err=(/info/)) - - isLoopValid = .false. - idx(i) = -1 - else - idx(i) = lip - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(ncol) - end if - end if + if (lip > 0) then + idx(i) = lip + info = psb_success_ + else + !$omp critical(hash_g2l_ins) + tlip = lip + nxt = lidx(i) + if (nxt <= nrow) then + idx(i) = -1 + else + call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& + & idxmap%glb_lc,ncol) + if (lip > 0) then + idx(i) = lip else - idx(i) = -1 + call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + if (info >=0) then + if (nxt == lip) then + call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& + & pad=-1_psb_lpk_,addsz=laddsz) + if (info /= psb_success_) then + info=1 + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='psb_ensure_size',i_err=(/info/)) + isLoopValid = .false. + end if + idxmap%loc_to_glob(nxt) = ip + call idxmap%set_lc(max(ncol,nxt)) + endif + idx(i) = lip + info = psb_success_ + else + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='SearchInsKeyVal',i_err=(/info/)) + isLoopValid = .false. + end if end if - !call OMP_unset_lock(ins_lck) - end if - else - idx(i) = lip + endif + !$omp end critical(hash_g2l_ins) end if else idx(i) = -1 end if + enddo + !$omp end parallel do - end do - ! $ OMP END PARALLEL DO - !$omp end critical(hash_g2l_ins) - - if (.not. isLoopValid) then - goto 9999 - end if - else - !$omp critical(hash_g2l_ins) + else if (.not.present(mask)) then - ! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & - ! $ OMP shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz,lidx) & - ! $ OMP private(i,ip,lip,tlip,nxt,info) & - ! $ OMP reduction(.AND.:isLoopValid) + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(lidx,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp reduction(.AND.:isLoopValid) do i = 1, is - info = 0 - if (.not. isLoopValid) cycle - ip = idx(i) - if ((ip < 1 ).or.(ip>mglob)) then + ip = idx(i) + if ((ip < 1 ).or.(ip>mglob) ) then idx(i) = -1 cycle endif - !call OMP_set_lock(ins_lck) ncol = idxmap%get_lc() - !call OMP_unset_lock(ins_lck) - - ! At first, we check the index presence in 'idxmap'. Usually - ! the index is found. If it is not found, we repeat the checking, - ! but inside a critical region. call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) - if (lip < 0) then - !call OMP_set_lock(ins_lck) - ! We check again if the index is already in 'idxmap', this - ! time inside a critical region (we assume that the index - ! is often already existing). - ncol = idxmap%get_lc() - nxt = lidx(i) - call hash_inner_cnv(ip,lip,idxmap%hashvmask,& - & idxmap%hashv,idxmap%glb_lc,ncol) - - if (lip > 0) then - idx(i) = lip - else if (lip < 0) then - call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) - lip = tlip - - if (info >= 0) then - ! 'nxt' is not equal to 'tlip' when the key is already inside - ! the hash map. In that case 'tlip' is the value corresponding - ! to the existing mapping. - if (nxt == tlip) then - - ncol = MAX(ncol,nxt) - - call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& - & pad=-1_psb_lpk_,addsz=laddsz) - - if (info /= psb_success_) then - !write(0,*) 'Error spot 2' - call psb_errpush(psb_err_from_subroutine_ai_,name,& - &a_err='psb_ensure_size',i_err=(/info/)) - - isLoopValid = .false. - idx(i) = -1 - else - idx(i) = lip - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(ncol) - end if - end if + if (lip > 0) then + idx(i) = lip + info = psb_success_ + else + !$omp critical(hash_g2l_ins) + tlip = lip + nxt = lidx(i) + if (nxt <= nrow) then + idx(i) = -1 + else + call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& + & idxmap%glb_lc,ncol) + if (lip > 0) then + idx(i) = lip else - idx(i) = -1 + call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + if (info >=0) then + if (nxt == lip) then + call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& + & pad=-1_psb_lpk_,addsz=laddsz) + if (info /= psb_success_) then + info=1 + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='psb_ensure_size',i_err=(/info/)) + isLoopValid = .false. + end if + idxmap%loc_to_glob(nxt) = ip + call idxmap%set_lc(max(ncol,nxt)) + endif + idx(i) = lip + info = psb_success_ + else + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='SearchInsKeyVal',i_err=(/info/)) + isLoopValid = .false. + end if end if - !call OMP_unset_lock(ins_lck) - end if - else - idx(i) = lip + endif + !$omp end critical(hash_g2l_ins) end if - - end do - ! $ OMP END PARALLEL DO - !$omp end critical(hash_g2l_ins) - - if (.not. isLoopValid) then - goto 9999 - end if + enddo + !$omp end parallel do end if + else if (.not.present(lidx)) then - if(present(mask)) then - ! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & - ! $ OMP shared(name,me,is,idx,ins_lck,mask,mglob,idxmap,ncol,nrow,laddsz) & - ! $ OMP private(i,ip,lip,tlip,nxt,info) & - ! $ OMP reduction(.AND.:isLoopValid) - !$omp critical(hash_g2l_ins) + if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp reduction(.AND.:isLoopValid) do i = 1, is - info = 0 - if (.not. isLoopValid) cycle if (mask(i)) then - ip = idx(i) - if ((ip < 1 ).or.(ip>mglob)) then + ip = idx(i) + if ((ip < 1 ).or.(ip>mglob)) then idx(i) = -1 cycle endif - !call OMP_set_lock(ins_lck) - ncol = idxmap%get_lc() - !call OMP_unset_lock(ins_lck) - - ! At first, we check the index presence in 'idxmap'. Usually - ! the index is found. If it is not found, we repeat the checking, - ! but inside a critical region. - !write(0,*) me,name,' b hic 1 ',psb_errstatus_fatal() + ncol = idxmap%get_lc() call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) - !write(0,*) me,name,' a hic 1 ',psb_errstatus_fatal() - if (lip < 0) then - !call OMP_set_lock(ins_lck) - ! We check again if the index is already in 'idxmap', this - ! time inside a critical region (we assume that the index - ! is often already existing, so this lock is relatively rare). - ncol = idxmap%get_lc() - nxt = ncol + 1 - !write(0,*) me,name,' b hic 2 ',psb_errstatus_fatal() - call hash_inner_cnv(ip,lip,idxmap%hashvmask,& - & idxmap%hashv,idxmap%glb_lc,ncol) - !write(0,*) me,name,' a hic 2 ',psb_errstatus_fatal() + if (lip > 0) then + idx(i) = lip + info = psb_success_ + else + !$omp critical(hash_g2l_ins) + ncol = idxmap%get_lc() + nxt = ncol + 1 + call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& + & idxmap%glb_lc,ncol) if (lip > 0) then - idx(i) = lip - else if (lip < 0) then - ! Index not found - !write(0,*) me,name,' b hsik ',psb_errstatus_fatal() - call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) - if (psb_errstatus_fatal()) write(0,*) me,name,' a hsik ',info,omp_get_thread_num() - !write(0,*) me,name,' a hsik ',psb_errstatus_fatal() - lip = tlip - - if (info >= 0) then - !write(0,*) 'Error before spot 3', info - ! 'nxt' is not equal to 'tlip' when the key is already inside - ! the hash map. In that case 'tlip' is the value corresponding - ! to the existing mapping. - if (nxt == tlip) then - - ncol = MAX(ncol,nxt) - call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& + idx(i) = lip + else + call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + if (info >=0) then + if (nxt == lip) then + call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& & pad=-1_psb_lpk_,addsz=laddsz) - if (psb_errstatus_fatal()) write(0,*) me,name,' a esz ',info,omp_get_thread_num() if (info /= psb_success_) then - !write(0,*) 'Error spot 3', info + info=1 call psb_errpush(psb_err_from_subroutine_ai_,name,& - &a_err='psb_ensure_size',i_err=(/info/)) - + & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. - idx(i) = -1 - else - idx(i) = lip - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(ncol) end if - end if + idxmap%loc_to_glob(nxt) = ip + call idxmap%set_lc(nxt) + endif + idx(i) = lip + info = psb_success_ else - idx(i) = -1 + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='SearchInsKeyVal',i_err=(/info/)) + isLoopValid = .false. end if - !call OMP_unset_lock(ins_lck) end if - else - idx(i) = lip + !$omp end critical(hash_g2l_ins) end if else idx(i) = -1 end if + enddo + !$omp end parallel do - end do - ! $ OMP END PARALLEL DO - !$omp end critical(hash_g2l_ins) + else if (.not.present(mask)) then - if (.not. isLoopValid) then - goto 9999 - end if - else - ! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & - ! $ OMP shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - ! $ OMP private(i,ip,lip,tlip,nxt,info) & - ! $ OMP reduction(.AND.:isLoopValid) - !$omp critical(hash_g2l_ins) + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp reduction(.AND.:isLoopValid) do i = 1, is - info = 0 - if (.not. isLoopValid) cycle - ip = idx(i) - if ((ip < 1 ).or.(ip>mglob)) then + ip = idx(i) + if ((ip < 1 ).or.(ip>mglob)) then idx(i) = -1 cycle endif - !call OMP_set_lock(ins_lck) - ncol = idxmap%get_lc() - !call OMP_unset_lock(ins_lck) - - ! At first, we check the index presence in 'idxmap'. Usually - ! the index is found. If it is not found, we repeat the checking, - ! but inside a critical region. + ncol = idxmap%get_lc() call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) - if (lip < 0) then - !call OMP_set_lock(ins_lck) - ! We check again if the index is already in 'idxmap', this - ! time inside a critical region (we assume that the index - ! is often already existing). - ncol = idxmap%get_lc() - nxt = ncol + 1 - call hash_inner_cnv(ip,lip,idxmap%hashvmask,& - & idxmap%hashv,idxmap%glb_lc,ncol) - + if (lip > 0) then + idx(i) = lip + info = psb_success_ + else + !$omp critical(hash_g2l_ins) + ncol = idxmap%get_lc() + nxt = ncol + 1 + call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& + & idxmap%glb_lc,ncol) if (lip > 0) then - idx(i) = lip - else if (lip < 0) then - ! Index not found - call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) - lip = tlip - - if (info >= 0) then - ! 'nxt' is not equal to 'tlip' when the key is already inside - ! the hash map. In that case 'tlip' is the value corresponding - ! to the existing mapping. - if (nxt == tlip) then - - ncol = MAX(ncol,nxt) - - call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& + idx(i) = lip + else + call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + if (info >=0) then + if (nxt == lip) then + call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& & pad=-1_psb_lpk_,addsz=laddsz) - if (info /= psb_success_) then - !write(0,*) 'Error spot 4' + info=1 call psb_errpush(psb_err_from_subroutine_ai_,name,& - &a_err='psb_ensure_size',i_err=(/info/)) - + & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. - idx(i) = -1 - else - idx(i) = lip - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(ncol) end if - end if + idxmap%loc_to_glob(nxt) = ip + call idxmap%set_lc(nxt) + endif + idx(i) = lip + info = psb_success_ else - idx(i) = -1 + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='SearchInsKeyVal',i_err=(/info/)) + isLoopValid = .false. end if - !call OMP_unset_lock(ins_lck) end if - - else - idx(i) = lip + !$omp end critical(hash_g2l_ins) end if - - end do - ! $ OMP END PARALLEL DO - !$omp end critical(hash_g2l_ins) - - if (.not. isLoopValid) then - goto 9999 - end if - + enddo + !$omp end parallel do end if end if else @@ -1047,7 +936,7 @@ contains idx = -1 info = -1 end if - !call OMP_destroy_lock(ins_lck) + if (.not. isLoopValid) goto 9999 #else !!$ else if (.not.use_openmp) then isLoopValid = .true. @@ -1066,13 +955,13 @@ contains call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) if (lip < 0) then - tlip = lip nxt = lidx(i) if (nxt <= nrow) then idx(i) = -1 cycle endif call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) + lip = tlip if (info >=0) then if (nxt == tlip) then ncol = max(ncol,nxt) @@ -1747,6 +1636,9 @@ contains ! for a width of psb_hash_bits ! if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(n,hashv,hashmask,x,glb_lc,nrm,mask) & + !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n if (mask(i)) then key = x(i) @@ -1784,7 +1676,11 @@ contains end if end if end do + !$omp end parallel do else + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(n,hashv,hashmask,x,glb_lc,nrm) & + !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n key = x(i) ih = iand(key,hashmask) @@ -1820,6 +1716,7 @@ contains x(i) = tmp end if end do + !$omp end parallel do end if end subroutine hash_inner_cnv1 @@ -1842,6 +1739,9 @@ contains ! for a width of psb_hash_bits ! if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,mask,psb_err_unit) & + !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n if (mask(i)) then key = x(i) @@ -1882,9 +1782,12 @@ contains end if end if end do - + !$omp end parallel do else + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,psb_err_unit) & + !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n key = x(i) ih = iand(key,hashmask) @@ -1923,6 +1826,7 @@ contains y(i) = tmp end if end do + !$omp end parallel do end if end subroutine hash_inner_cnv2 diff --git a/base/modules/desc/psb_hash_mod.F90 b/base/modules/desc/psb_hash_mod.F90 index eb5556a2..18b1142d 100644 --- a/base/modules/desc/psb_hash_mod.F90 +++ b/base/modules/desc/psb_hash_mod.F90 @@ -383,12 +383,12 @@ contains integer(psb_lpk_), intent(out) :: val integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: hsize,hmask, hk, hd + integer(psb_ipk_) :: hsize,hmask, hk, hd, i + logical :: redo info = HashOK hsize = hash%hsize hmask = hash%hmask - val = -1 hk = iand(psb_hashval(key),hmask) if (hk == 0) then hd = 1 @@ -400,56 +400,57 @@ contains info = HashOutOfMemory return end if - + val = -1 + !$omp atomic hash%nsrch = hash%nsrch + 1 + !$omp end atomic do + !$omp atomic hash%nacc = hash%nacc + 1 + !$omp end atomic if (hash%table(hk,1) == key) then val = hash%table(hk,2) info = HashDuplicate - !write(0,*) 'In searchinskey 1 : ', info, HashDuplicate return end if + redo = .false. !$omp critical(hashsearchins) - if (hash%table(hk,1) == key) then - val = hash%table(hk,2) - info = HashDuplicate - else - if (hash%table(hk,1) == HashFreeEntry) then - if (hash%nk == hash%hsize -1) then - ! - ! Note: because of the way we allocate things at CDALL - ! time this is really unlikely; if we get here, we - ! have at least as many halo indices as internals, which - ! means we're already in trouble. But we try to keep going. - ! - call psb_hash_realloc(hash,info) - if (info /= HashOk) then - info = HashOutOfMemory - !return - else - call psb_hash_searchinskey(key,val,nextval,hash,info) - !return - end if + if (hash%table(hk,1) == HashFreeEntry) then + if (hash%nk == hash%hsize -1) then + ! + ! Note: because of the way we allocate things at CDALL + ! time this is really unlikely; if we get here, we + ! have at least as many halo indices as internals, which + ! means we're already in trouble. But we try to keep going. + ! + call psb_hash_realloc(hash,info) + if (info /= HashOk) then + info = HashOutOfMemory else - hash%nk = hash%nk + 1 - hash%table(hk,1) = key - hash%table(hk,2) = nextval - val = nextval - !return + redo = .true. end if + else + hash%nk = hash%nk + 1 + hash%table(hk,1) = key + hash%table(hk,2) = nextval + val = nextval + info = HashOk end if + else if (hash%table(hk,1) == key) then + val = hash%table(hk,2) + info = HashDuplicate + else + info = HashNotFound end if !$omp end critical(hashsearchins) - if (info /= HashOk) then - write(0,*) 'In searchinskey 2: ', info + if (redo) then + call psb_hash_searchinskey(key,val,nextval,hash,info) return end if - if (val > 0) return + if (val > 0) exit hk = hk - hd if (hk < 0) hk = hk + hsize end do - !write(0,*) 'In searchinskey 3: ', info end subroutine psb_hash_lsearchinskey recursive subroutine psb_hash_isearchinskey(key,val,nextval,hash,info) @@ -459,10 +460,11 @@ contains integer(psb_ipk_) :: hsize,hmask, hk, hd logical :: redo + info = HashOK hsize = hash%hsize hmask = hash%hmask - + hk = iand(psb_hashval(key),hmask) if (hk == 0) then hd = 1 @@ -475,17 +477,22 @@ contains return end if val = -1 + val = -1 + !$omp atomic hash%nsrch = hash%nsrch + 1 + !$omp end atomic do + !$omp atomic hash%nacc = hash%nacc + 1 + !$omp end atomic if (hash%table(hk,1) == key) then val = hash%table(hk,2) info = HashDuplicate return end if redo = .false. - !$OMP CRITICAL - if (hash%table(hk,1) == HashFreeEntry) then + !$omp critical(hashsearchins) + if (hash%table(hk,1) == HashFreeEntry) then if (hash%nk == hash%hsize -1) then ! ! Note: because of the way we allocate things at CDALL @@ -496,24 +503,28 @@ contains call psb_hash_realloc(hash,info) if (info /= HashOk) then info = HashOutOfMemory - !return else redo = .true. -!!$ call psb_hash_searchinskey(key,val,nextval,hash,info) -!!$ return end if else hash%nk = hash%nk + 1 hash%table(hk,1) = key hash%table(hk,2) = nextval val = nextval - !return + info = HashOk end if + else if (hash%table(hk,1) == key) then + val = hash%table(hk,2) + info = HashDuplicate + else + info = HashNotFound end if - !$OMP END CRITICAL - if (redo) call psb_hash_searchinskey(key,val,nextval,hash,info) - if (info /= HashOk) return - if (val > 0) return + !$omp end critical(hashsearchins) + if (redo) then + call psb_hash_searchinskey(key,val,nextval,hash,info) + return + end if + if (val > 0) exit hk = hk - hd if (hk < 0) hk = hk + hsize end do @@ -551,7 +562,7 @@ contains end if if (hash%table(hk,1) == HashFreeEntry) then val = HashFreeEntry -! !$ info = HashNotFound + info = HashNotFound return end if hk = hk - hd @@ -591,7 +602,7 @@ contains end if if (hash%table(hk,1) == HashFreeEntry) then val = HashFreeEntry -! !$ info = HashNotFound + info = HashNotFound return end if hk = hk - hd From 98a9005602469c2e711486984102a152e5dcb1a8 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 7 May 2024 05:50:45 -0400 Subject: [PATCH 087/110] Further advances on OpenMP versions of various index maps. --- base/modules/desc/psb_gen_block_map_mod.F90 | 48 +++++++++++++++------ base/modules/desc/psb_hash_map_mod.F90 | 10 ++++- base/modules/desc/psb_list_map_mod.F90 | 35 +++++++++++++-- 3 files changed, 75 insertions(+), 18 deletions(-) diff --git a/base/modules/desc/psb_gen_block_map_mod.F90 b/base/modules/desc/psb_gen_block_map_mod.F90 index f0c433e0..650bb430 100644 --- a/base/modules/desc/psb_gen_block_map_mod.F90 +++ b/base/modules/desc/psb_gen_block_map_mod.F90 @@ -215,7 +215,9 @@ contains end if if (present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap,owned_,info) & + !$omp private(i) do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then @@ -229,9 +231,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap,owned_,info) & + !$omp private(i) do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then idx(i) = idxmap%min_glob_row + idx(i) - 1 @@ -243,7 +247,7 @@ contains info = -1 end if end do - + !$omp end parallel do end if end subroutine block_ll2gv1 @@ -277,7 +281,9 @@ contains end if if (present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,idxmap,owned_,info) & + !$omp private(i) do i=1, im if (mask(i)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then @@ -291,9 +297,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,idxmap,owned_,info) & + !$omp private(i) do i=1, im if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then idxout(i) = idxmap%min_glob_row + idxin(i) - 1 @@ -305,7 +313,7 @@ contains info = -1 end if end do - + !$omp end parallel do end if if (is > im) then @@ -392,6 +400,9 @@ contains if (present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idx,idxmap,owned_) & + !$omp private(i,nv,tidx) do i=1, is if (mask(i)) then if ((idxmap%min_glob_row <= idx(i)).and. & @@ -408,7 +419,11 @@ contains end if end if end do + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idx,idxmap,owned_) & + !$omp private(i,ip,lip,tidx,info) do i=1,is if (mask(i)) then if ((idxmap%min_glob_row <= idx(i)).and.& @@ -424,8 +439,8 @@ contains end if end if end do + !$omp end parallel do else -!!$ write(0,*) 'Block status: invalid ',idxmap%get_state() idx(1:is) = -1 info = -1 end if @@ -433,6 +448,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idx,idxmap,owned_) & + !$omp private(i,nv,tidx) do i=1, is if ((idxmap%min_glob_row <= idx(i)).and.& & (idx(i) <= idxmap%max_glob_row)) then @@ -447,8 +465,11 @@ contains idx(i) = -1 end if end do - + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idx,idxmap,owned_) & + !$omp private(i,ip,lip,tidx,info) do i=1,is if ((idxmap%min_glob_row <= idx(i)).and.& & (idx(i) <= idxmap%max_glob_row)) then @@ -462,6 +483,7 @@ contains idx(i) = -1 end if end do + !$omp end parallel do else idx(1:is) = -1 info = -1 @@ -953,7 +975,9 @@ contains end if info = psb_success_ else - info = -5 + write(0,*) 'From has_search_ins:',info,ip,lip,nxt,& + & idxmap%min_glob_row,idxmap%max_glob_row + info = -6 return end if idxout(i) = lip + idxmap%local_rows @@ -1131,7 +1155,7 @@ contains idxmap%global_cols = ntot idxmap%local_rows = nl idxmap%local_cols = nl - idxmap%ctxt = ctxt + idxmap%ctxt = ctxt idxmap%state = psb_desc_bld_ idxmap%mpic = psb_get_mpi_comm(ctxt) idxmap%min_glob_row = vnl(iam)+1 diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index 2ccb9b28..96c917d4 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -221,6 +221,9 @@ contains if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap,owned_) & + !$omp private(i) do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then @@ -233,9 +236,12 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap,owned_) & + !$omp private(i) do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then idx(i) = idxmap%loc_to_glob(idx(i)) @@ -246,7 +252,7 @@ contains idx(i) = -1 end if end do - + !$omp end parallel do end if end subroutine hash_l2gv1 diff --git a/base/modules/desc/psb_list_map_mod.F90 b/base/modules/desc/psb_list_map_mod.F90 index 6b61cf52..913145da 100644 --- a/base/modules/desc/psb_list_map_mod.F90 +++ b/base/modules/desc/psb_list_map_mod.F90 @@ -178,7 +178,10 @@ contains end if if (present(mask)) then - + + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap,owned_) & + !$omp private(i) do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then @@ -191,9 +194,12 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap,owned_) & + !$omp private(i) do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then idx(i) = idxmap%loc_to_glob(idx(i)) @@ -204,7 +210,8 @@ contains idx(i) = -1 end if end do - + !$omp end parallel do + end if end subroutine list_ll2gv1 @@ -298,6 +305,9 @@ contains if (present(mask)) then if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idx,idxmap,owned_) & + !$omp private(i,ix) do i=1,is if (mask(i)) then if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -309,6 +319,7 @@ contains end if end if end do + !$omp end parallel do else idx(1:is) = -1 info = -1 @@ -317,6 +328,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idx,idxmap,owned_) & + !$omp private(i,ix) do i=1, is if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idx(i)) @@ -326,6 +340,7 @@ contains idx(i) = -1 end if end do + !$omp end parallel do else idx(1:is) = -1 info = -1 @@ -365,6 +380,9 @@ contains if (present(mask)) then if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idxin,idxout,idxmap,owned_) & + !$omp private(i,ix) do i=1,is if (mask(i)) then if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -376,6 +394,7 @@ contains end if end if end do + !$omp end parallel do else idxout(1:is) = -1 info = -1 @@ -384,6 +403,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idxin,idxout,idxmap,owned_) & + !$omp private(i,ix) do i=1, is if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idxin(i)) @@ -393,6 +415,7 @@ contains idxout(i) = -1 end if end do + !$omp end parallel do else idxout(1:is) = -1 info = -1 @@ -541,6 +564,10 @@ contains else if (.not.present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idx,idxmap,laddsz,lidx) & + !$omp private(i,ix,info) + ! $ o m p reduction(.AND.:isLoopValid) do i=1, is if (info /= 0) cycle if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -579,8 +606,8 @@ contains idx(i) = -1 end if end do + !$omp end parallel do end if - else if (.not.present(lidx)) then if (present(mask)) then From 773b79e7bc7437ad270c8b8459c88b2fe1a927c4 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 7 May 2024 06:11:15 -0400 Subject: [PATCH 088/110] OpenMP in repl_map --- base/modules/desc/psb_repl_map_mod.F90 | 94 ++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 14 deletions(-) diff --git a/base/modules/desc/psb_repl_map_mod.F90 b/base/modules/desc/psb_repl_map_mod.F90 index fe51b7b1..738d6de2 100644 --- a/base/modules/desc/psb_repl_map_mod.F90 +++ b/base/modules/desc/psb_repl_map_mod.F90 @@ -169,7 +169,9 @@ contains end if if (present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap) & + !$omp private(i) do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then @@ -179,9 +181,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap) & + !$omp private(i) do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then ! do nothing @@ -189,7 +193,7 @@ contains idx(i) = -1 end if end do - + !$omp end parallel do end if end subroutine repl_l2gv1 @@ -223,7 +227,9 @@ contains end if if (present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,idxmap) & + !$omp private(i) do i=1, im if (mask(i)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then @@ -233,9 +239,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,idxmap) & + !$omp private(i) do i=1, im if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then idxout(i) = idxin(i) @@ -243,7 +251,7 @@ contains idxout(i) = -1 end if end do - + !$omp end parallel do end if if (is > im) info = -3 @@ -324,6 +332,9 @@ contains if (present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap) & + !$omp private(i) do i=1, is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -333,7 +344,11 @@ contains end if end if end do + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap) & + !$omp private(i) do i=1,is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -344,6 +359,7 @@ contains end if end if end do + !$omp end parallel do else idx(1:is) = -1 info = -1 @@ -352,6 +368,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap) & + !$omp private(i) do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -359,7 +378,11 @@ contains idx(i) = -1 end if end do + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap) & + !$omp private(i) do i=1,is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -367,6 +390,7 @@ contains idx(i) = -1 end if end do + !$omp end parallel do else idx(1:is) = -1 info = -1 @@ -409,6 +433,9 @@ contains if (present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,idxmap) & + !$omp private(i) do i=1, im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -418,7 +445,11 @@ contains end if end if end do + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,idxmap) & + !$omp private(i) do i=1,im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -428,6 +459,7 @@ contains end if end if end do + !$omp end parallel do else idxout(1:im) = -1 info = -1 @@ -436,6 +468,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,idxmap) & + !$omp private(i) do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -443,7 +478,11 @@ contains idxout(i) = -1 end if end do + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,idxmap) & + !$omp private(i) do i=1,im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -451,6 +490,7 @@ contains idxout(i) = -1 end if end do + !$omp end parallel do else idxout(1:im) = -1 info = -1 @@ -557,6 +597,9 @@ contains else if (idxmap%is_valid()) then if (present(lidx)) then if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,lidx,is,idxmap) & + !$omp private(i) do i=1, is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -566,9 +609,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,lidx,is,idxmap) & + !$omp private(i) do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -576,9 +621,13 @@ contains idx(i) = -1 end if end do + !$omp end parallel do end if else if (.not.present(lidx)) then if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,is,idxmap) & + !$omp private(i) do i=1, is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -588,8 +637,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,is,idxmap) & + !$omp private(i) do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -597,6 +649,7 @@ contains idx(i) = -1 end if end do + !$omp end parallel do end if end if else @@ -644,6 +697,9 @@ contains else if (idxmap%is_valid()) then if (present(lidx)) then if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,im,idxmap) & + !$omp private(i) do i=1, im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -653,9 +709,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,im,idxmap) & + !$omp private(i) do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -663,9 +721,13 @@ contains idxout(i) = -1 end if end do + !$omp end parallel do end if else if (.not.present(lidx)) then if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,im,idxmap) & + !$omp private(i) do i=1, im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -675,8 +737,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,im,idxmap) & + !$omp private(i) do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -684,6 +749,7 @@ contains idxout(i) = -1 end if end do + !$omp end parallel do end if end if else From e711c53fab67b1f1e87ad933704cff97e845a9aa Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 7 May 2024 07:07:18 -0400 Subject: [PATCH 089/110] Make sure we compile when LPK /= IPK --- base/modules/desc/psb_hash_map_mod.F90 | 36 ++++++++++++++++---------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index 09cde3d4..952aef30 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -733,10 +733,11 @@ contains if (lip > 0) then idx(i) = lip else - call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) + lip = tlip if (info >=0) then if (nxt == lip) then - call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& + call psb_ensure_size(lip,idxmap%loc_to_glob,info,& & pad=-1_psb_lpk_,addsz=laddsz) if (info /= psb_success_) then info=1 @@ -745,7 +746,8 @@ contains isLoopValid = .false. end if idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(max(ncol,nxt)) + ncol = max(ncol,nxt) + call idxmap%set_lc(ncol) endif idx(i) = lip info = psb_success_ @@ -794,10 +796,11 @@ contains if (lip > 0) then idx(i) = lip else - call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) + lip = tlip if (info >=0) then if (nxt == lip) then - call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& + call psb_ensure_size(lip,idxmap%loc_to_glob,info,& & pad=-1_psb_lpk_,addsz=laddsz) if (info /= psb_success_) then info=1 @@ -806,7 +809,8 @@ contains isLoopValid = .false. end if idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(max(ncol,nxt)) + ncol = max(ncol,nxt) + call idxmap%set_lc(ncol) endif idx(i) = lip info = psb_success_ @@ -852,10 +856,11 @@ contains if (lip > 0) then idx(i) = lip else - call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) + lip = tlip if (info >=0) then if (nxt == lip) then - call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& + call psb_ensure_size(lip,idxmap%loc_to_glob,info,& & pad=-1_psb_lpk_,addsz=laddsz) if (info /= psb_success_) then info=1 @@ -863,8 +868,9 @@ contains & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. end if - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(nxt) + ncol = nxt + idxmap%loc_to_glob(ncol) = ip + call idxmap%set_lc(ncol) endif idx(i) = lip info = psb_success_ @@ -909,10 +915,11 @@ contains if (lip > 0) then idx(i) = lip else - call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) + lip = tlip if (info >=0) then if (nxt == lip) then - call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& + call psb_ensure_size(lip,idxmap%loc_to_glob,info,& & pad=-1_psb_lpk_,addsz=laddsz) if (info /= psb_success_) then info=1 @@ -920,8 +927,9 @@ contains & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. end if - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(nxt) + ncol = nxt + idxmap%loc_to_glob(ncol) = ip + call idxmap%set_lc(ncol) endif idx(i) = lip info = psb_success_ From 7dc64692cc5bc75b70e4e22eb013b9a4fdb80a02 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 8 May 2024 14:46:27 +0200 Subject: [PATCH 090/110] Fix for OpenMP runs in hash_map_mod --- base/modules/desc/psb_hash_map_mod.F90 | 30 +++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index 952aef30..8695fb93 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -660,7 +660,7 @@ contains logical, intent(in), optional :: mask(:) integer(psb_ipk_), intent(in), optional :: lidx(:) - integer(psb_ipk_) :: i, is, lip, nrow, ncol, & + integer(psb_ipk_) :: i, is, lip, nrow, ncol, inxt,& & err_act integer(psb_lpk_) :: mglob, ip, nxt, tlip type(psb_ctxt_type) :: ctxt @@ -706,7 +706,7 @@ contains if (present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(lidx,mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp private(i,ip,lip,tlip,nxt,inxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is if (mask(i)) then @@ -746,8 +746,8 @@ contains isLoopValid = .false. end if idxmap%loc_to_glob(nxt) = ip - ncol = max(ncol,nxt) - call idxmap%set_lc(ncol) + inxt = max(ncol,nxt) + call idxmap%set_lc(inxt) endif idx(i) = lip info = psb_success_ @@ -770,7 +770,7 @@ contains !$omp parallel do default(none) schedule(dynamic) & !$omp shared(lidx,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp private(i,ip,lip,tlip,nxt,inxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is ip = idx(i) @@ -809,8 +809,8 @@ contains isLoopValid = .false. end if idxmap%loc_to_glob(nxt) = ip - ncol = max(ncol,nxt) - call idxmap%set_lc(ncol) + inxt = max(ncol,nxt) + call idxmap%set_lc(inxt) endif idx(i) = lip info = psb_success_ @@ -832,7 +832,7 @@ contains if (present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp private(i,ip,lip,tlip,nxt,inxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is if (mask(i)) then @@ -868,9 +868,9 @@ contains & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. end if - ncol = nxt - idxmap%loc_to_glob(ncol) = ip - call idxmap%set_lc(ncol) + inxt = nxt + idxmap%loc_to_glob(inxt) = ip + call idxmap%set_lc(inxt) endif idx(i) = lip info = psb_success_ @@ -892,7 +892,7 @@ contains !$omp parallel do default(none) schedule(dynamic) & !$omp shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp private(i,ip,lip,tlip,nxt,inxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is ip = idx(i) @@ -927,9 +927,9 @@ contains & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. end if - ncol = nxt - idxmap%loc_to_glob(ncol) = ip - call idxmap%set_lc(ncol) + inxt = nxt + idxmap%loc_to_glob(inxt) = ip + call idxmap%set_lc(inxt) endif idx(i) = lip info = psb_success_ From 7ec394ce1cc946697b134f33a54315b0d5921c10 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 9 May 2024 11:29:23 +0200 Subject: [PATCH 091/110] Rename indx_map_mod and put SET_LR/C under ifdef --- ..._indx_map_mod.f90 => psb_indx_map_mod.F90} | 48 +++++++++++++++---- 1 file changed, 40 insertions(+), 8 deletions(-) rename base/modules/desc/{psb_indx_map_mod.f90 => psb_indx_map_mod.F90} (97%) diff --git a/base/modules/desc/psb_indx_map_mod.f90 b/base/modules/desc/psb_indx_map_mod.F90 similarity index 97% rename from base/modules/desc/psb_indx_map_mod.f90 rename to base/modules/desc/psb_indx_map_mod.F90 index 422be5f3..fa3e5a69 100644 --- a/base/modules/desc/psb_indx_map_mod.f90 +++ b/base/modules/desc/psb_indx_map_mod.F90 @@ -153,12 +153,25 @@ module psb_indx_map_mod procedure, pass(idxmap) :: set_gci => base_set_gci procedure, pass(idxmap) :: set_grl => base_set_grl procedure, pass(idxmap) :: set_gcl => base_set_gcl +#if defined(IPK4) && defined(LPK8) + generic, public :: set_gr => set_grl, set_gri + generic, public :: set_gc => set_gcl, set_gci +#else generic, public :: set_gr => set_grl generic, public :: set_gc => set_gcl - - procedure, pass(idxmap) :: set_lr => base_set_lr - procedure, pass(idxmap) :: set_lc => base_set_lc +#endif + procedure, pass(idxmap) :: set_lri => base_set_lri + procedure, pass(idxmap) :: set_lrl => base_set_lrl + procedure, pass(idxmap) :: set_lci => base_set_lci + procedure, pass(idxmap) :: set_lcl => base_set_lcl procedure, pass(idxmap) :: inc_lc => base_inc_lc +#if defined(IPK4) && defined(LPK8) + generic, public :: set_lr => set_lrl, set_lri + generic, public :: set_lc => set_lcl, set_lci +#else + generic, public :: set_lr => set_lri + generic, public :: set_lc => set_lci +#endif procedure, pass(idxmap) :: set_p_adjcncy => base_set_p_adjcncy procedure, pass(idxmap) :: xtnd_p_adjcncy => base_xtnd_p_adjcncy @@ -236,7 +249,8 @@ module psb_indx_map_mod & base_get_gr, base_get_gc, base_get_lr, base_get_lc, base_get_ctxt,& & base_get_mpic, base_sizeof, base_set_null, & & base_set_grl, base_set_gcl, & - & base_set_lr, base_set_lc, base_inc_lc, base_set_ctxt,& + & base_set_lri, base_set_lci, base_set_lrl, base_set_lcl, & + & base_inc_lc, base_set_ctxt,& & base_set_mpic, base_get_fmt, base_asb, base_free,& & base_l2gs1, base_l2gs2, base_l2gv1, base_l2gv2,& & base_g2ls1, base_g2ls2, base_g2lv1, base_g2lv2,& @@ -558,21 +572,39 @@ contains idxmap%global_cols = val end subroutine base_set_gcl - subroutine base_set_lr(idxmap,val) + subroutine base_set_lri(idxmap,val) implicit none class(psb_indx_map), intent(inout) :: idxmap integer(psb_ipk_), intent(in) :: val idxmap%local_rows = val - end subroutine base_set_lr + end subroutine base_set_lri - subroutine base_set_lc(idxmap,val) + subroutine base_set_lci(idxmap,val) implicit none class(psb_indx_map), intent(inout) :: idxmap integer(psb_ipk_), intent(in) :: val + !$omp critical + idxmap%local_cols = val + !$omp end critical + end subroutine base_set_lci + + subroutine base_set_lrl(idxmap,val) + implicit none + class(psb_indx_map), intent(inout) :: idxmap + integer(psb_lpk_), intent(in) :: val + idxmap%local_rows = val + end subroutine base_set_lrl + + subroutine base_set_lcl(idxmap,val) + implicit none + class(psb_indx_map), intent(inout) :: idxmap + integer(psb_lpk_), intent(in) :: val + !$omp critical idxmap%local_cols = val - end subroutine base_set_lc + !$omp end critical + end subroutine base_set_lcl subroutine base_inc_lc(idxmap) implicit none From d8ed01218dcf12c7698b6f2471cebcb05c174055 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 9 May 2024 12:30:06 +0200 Subject: [PATCH 092/110] Cleanup hash_map using new indx_map%set_lc --- base/modules/desc/psb_hash_map_mod.F90 | 28 ++++++++++++-------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index 8695fb93..c3d833c6 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -660,7 +660,7 @@ contains logical, intent(in), optional :: mask(:) integer(psb_ipk_), intent(in), optional :: lidx(:) - integer(psb_ipk_) :: i, is, lip, nrow, ncol, inxt,& + integer(psb_ipk_) :: i, is, lip, nrow, ncol,& & err_act integer(psb_lpk_) :: mglob, ip, nxt, tlip type(psb_ctxt_type) :: ctxt @@ -706,7 +706,7 @@ contains if (present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(lidx,mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,inxt,info) & + !$omp private(i,ip,lip,tlip,nxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is if (mask(i)) then @@ -746,8 +746,8 @@ contains isLoopValid = .false. end if idxmap%loc_to_glob(nxt) = ip - inxt = max(ncol,nxt) - call idxmap%set_lc(inxt) + nxt = max(ncol,nxt) + call idxmap%set_lc(nxt) endif idx(i) = lip info = psb_success_ @@ -770,7 +770,7 @@ contains !$omp parallel do default(none) schedule(dynamic) & !$omp shared(lidx,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,inxt,info) & + !$omp private(i,ip,lip,tlip,nxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is ip = idx(i) @@ -809,8 +809,8 @@ contains isLoopValid = .false. end if idxmap%loc_to_glob(nxt) = ip - inxt = max(ncol,nxt) - call idxmap%set_lc(inxt) + nxt = max(ncol,nxt) + call idxmap%set_lc(nxt) endif idx(i) = lip info = psb_success_ @@ -832,7 +832,7 @@ contains if (present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,inxt,info) & + !$omp private(i,ip,lip,tlip,nxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is if (mask(i)) then @@ -868,9 +868,8 @@ contains & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. end if - inxt = nxt - idxmap%loc_to_glob(inxt) = ip - call idxmap%set_lc(inxt) + idxmap%loc_to_glob(nxt) = ip + call idxmap%set_lc(nxt) endif idx(i) = lip info = psb_success_ @@ -892,7 +891,7 @@ contains !$omp parallel do default(none) schedule(dynamic) & !$omp shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,inxt,info) & + !$omp private(i,ip,lip,tlip,nxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is ip = idx(i) @@ -927,9 +926,8 @@ contains & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. end if - inxt = nxt - idxmap%loc_to_glob(inxt) = ip - call idxmap%set_lc(inxt) + idxmap%loc_to_glob(nxt) = ip + call idxmap%set_lc(nxt) endif idx(i) = lip info = psb_success_ From d01b8145c6f4de5b3beb4ebabc88c6ec69e52bfe Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 9 May 2024 17:08:45 +0200 Subject: [PATCH 093/110] Fix cuda makefile dependencies --- cuda/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda/Makefile b/cuda/Makefile index 49c5f0cb..33787f4e 100755 --- a/cuda/Makefile +++ b/cuda/Makefile @@ -64,7 +64,7 @@ spgpulib: $(MAKE) -C spgpu lib -dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o +diagdev_mod.o dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o: psb_i_cuda_vect_mod.o psb_i_cuda_vect_mod.o : psb_vectordev_mod.o psb_cuda_env_mod.o cusparse_mod.o: s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o From a613e963dbf823664a4aa90413931b12e8584705 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 10 May 2024 10:10:54 +0200 Subject: [PATCH 094/110] First step in fix for coo_impl on OpenMP --- base/serial/impl/psb_c_coo_impl.F90 | 2 +- base/serial/impl/psb_d_coo_impl.F90 | 2 +- base/serial/impl/psb_s_coo_impl.F90 | 2 +- base/serial/impl/psb_z_coo_impl.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 5c90e287..84517ead 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -4205,7 +4205,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one ! row/column at the time, we allocate this way. - allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) + allocate(iaux(MAX((nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index f6a173d1..1b122fd8 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -4205,7 +4205,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one ! row/column at the time, we allocate this way. - allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) + allocate(iaux(MAX((nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 4c12d8fc..3df01ba6 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -4205,7 +4205,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one ! row/column at the time, we allocate this way. - allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) + allocate(iaux(MAX((nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 44ee89b5..356914de 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -4205,7 +4205,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one ! row/column at the time, we allocate this way. - allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) + allocate(iaux(MAX((nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) From ecccb1391437fea6dc6984129e79e32140293091 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 10 May 2024 13:53:35 +0200 Subject: [PATCH 095/110] Fix COO fix_coo_inner_rowmajor not to overflow on integers. --- base/serial/impl/psb_c_coo_impl.F90 | 109 +++++++++++++++------------- base/serial/impl/psb_d_coo_impl.F90 | 109 +++++++++++++++------------- base/serial/impl/psb_s_coo_impl.F90 | 109 +++++++++++++++------------- base/serial/impl/psb_z_coo_impl.F90 | 109 +++++++++++++++------------- 4 files changed, 240 insertions(+), 196 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 84517ead..cc3c6842 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -4268,7 +4268,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) complex(psb_spk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: nza, nzl,iret, maxnr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4277,7 +4277,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) + integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) #endif info = psb_success_ @@ -4302,10 +4302,13 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if if (use_buffers) then - iaux(:) = 0 #if defined(OPENMP) + !$omp workshare + iaux(:) = 0 + !$omp end workshare + maxnr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux) & + !$OMP shared(nzin,ia,nr,iaux,maxnr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4319,7 +4322,9 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO + maxnr = maxval(iaux(1:nr)) #else + iaux(:) = 0 !srt_inp = .true. do i=1,nzin if ((ia(i) < 1).or.(ia(i) > nr)) then @@ -4342,22 +4347,21 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf if (use_buffers) then #if defined(OPENMP) maxthreads = omp_get_max_threads() - allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if + !$omp workshare kaux(:) = 0 - sum(:) = 0 - sum(1) = 1 + !$omp end workshare err = 0 - ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4382,60 +4386,67 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !write(0,*) 'fix_coo_inner: trying with exscan' call psi_exscan(nr+1,iaux,info,shift=ione) !$OMP BARRIER - !$OMP SINGLE - !t0 = omp_get_wtime() - !$OMP END SINGLE - + + ! ------------------ Sorting and buffers ------------------- ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! unmodified 'iaux' - do j=idxstart,idxend + + !$omp do private(j) + do j=1,nr+1 idxaux(j) = iaux(j) end do + !$omp end do + ! Here we sort data inside the auxiliary buffers + !$omp do private(act_row,i,i1) do i=1,nzin act_row = ia(i) - if ((act_row >= idxstart) .and. (act_row <= idxend)) then - ias(idxaux(act_row)) = ia(i) - jas(idxaux(act_row)) = ja(i) - vs(idxaux(act_row)) = val(i) - idxaux(act_row) = idxaux(act_row) + 1 - end if + !$omp atomic capture + i1 =idxaux(act_row) + idxaux(act_row) = idxaux(act_row) + 1 + !$omp end atomic + ias(i1) = ia(i) + jas(i1) = ja(i) + vs(i1) = val(i) end do + !$omp end do - !$OMP BARRIER - !$OMP SINGLE - !t1 = omp_get_wtime() - !write(0,*) ithread,'Srt&Cpy :',t1-t0 - !$OMP END SINGLE ! Let's sort column indices and values. After that we will store ! the number of unique values in 'kaux' - do j=idxstart,idxend - first_elem = iaux(j) - last_elem = iaux(j+1) - 1 - nzl = last_elem - first_elem + 1 - - ! The row has elements? - if (nzl > 0) then - call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(maxnr+2)) + !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 - ! Over each row we count the unique values - kaux(j) = 1 - do i=first_elem+1,last_elem - if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then - cycle + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & ixt,iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & ixt) end if - kaux(j) = kaux(j) + 1 - end do - end if - end do + + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + !$omp end do + deallocate(ixt) + end block ! -------------------------------------------------- ! ---------------- kaux composition ---------------- @@ -4553,7 +4564,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf nzout = kaux(nr+1) - 1 - deallocate(sum,kaux,idxaux,stat=info) + deallocate(kaux,idxaux,stat=info) #else !if (.not.srt_inp) then diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index 1b122fd8..e523d83d 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -4268,7 +4268,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) real(psb_dpk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: nza, nzl,iret, maxnr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4277,7 +4277,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) + integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) #endif info = psb_success_ @@ -4302,10 +4302,13 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if if (use_buffers) then - iaux(:) = 0 #if defined(OPENMP) + !$omp workshare + iaux(:) = 0 + !$omp end workshare + maxnr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux) & + !$OMP shared(nzin,ia,nr,iaux,maxnr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4319,7 +4322,9 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO + maxnr = maxval(iaux(1:nr)) #else + iaux(:) = 0 !srt_inp = .true. do i=1,nzin if ((ia(i) < 1).or.(ia(i) > nr)) then @@ -4342,22 +4347,21 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf if (use_buffers) then #if defined(OPENMP) maxthreads = omp_get_max_threads() - allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if + !$omp workshare kaux(:) = 0 - sum(:) = 0 - sum(1) = 1 + !$omp end workshare err = 0 - ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4382,60 +4386,67 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !write(0,*) 'fix_coo_inner: trying with exscan' call psi_exscan(nr+1,iaux,info,shift=ione) !$OMP BARRIER - !$OMP SINGLE - !t0 = omp_get_wtime() - !$OMP END SINGLE - + + ! ------------------ Sorting and buffers ------------------- ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! unmodified 'iaux' - do j=idxstart,idxend + + !$omp do private(j) + do j=1,nr+1 idxaux(j) = iaux(j) end do + !$omp end do + ! Here we sort data inside the auxiliary buffers + !$omp do private(act_row,i,i1) do i=1,nzin act_row = ia(i) - if ((act_row >= idxstart) .and. (act_row <= idxend)) then - ias(idxaux(act_row)) = ia(i) - jas(idxaux(act_row)) = ja(i) - vs(idxaux(act_row)) = val(i) - idxaux(act_row) = idxaux(act_row) + 1 - end if + !$omp atomic capture + i1 =idxaux(act_row) + idxaux(act_row) = idxaux(act_row) + 1 + !$omp end atomic + ias(i1) = ia(i) + jas(i1) = ja(i) + vs(i1) = val(i) end do + !$omp end do - !$OMP BARRIER - !$OMP SINGLE - !t1 = omp_get_wtime() - !write(0,*) ithread,'Srt&Cpy :',t1-t0 - !$OMP END SINGLE ! Let's sort column indices and values. After that we will store ! the number of unique values in 'kaux' - do j=idxstart,idxend - first_elem = iaux(j) - last_elem = iaux(j+1) - 1 - nzl = last_elem - first_elem + 1 - - ! The row has elements? - if (nzl > 0) then - call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(maxnr+2)) + !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 - ! Over each row we count the unique values - kaux(j) = 1 - do i=first_elem+1,last_elem - if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then - cycle + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & ixt,iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & ixt) end if - kaux(j) = kaux(j) + 1 - end do - end if - end do + + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + !$omp end do + deallocate(ixt) + end block ! -------------------------------------------------- ! ---------------- kaux composition ---------------- @@ -4553,7 +4564,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf nzout = kaux(nr+1) - 1 - deallocate(sum,kaux,idxaux,stat=info) + deallocate(kaux,idxaux,stat=info) #else !if (.not.srt_inp) then diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 3df01ba6..b46731fc 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -4268,7 +4268,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) real(psb_spk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: nza, nzl,iret, maxnr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4277,7 +4277,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) + integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) #endif info = psb_success_ @@ -4302,10 +4302,13 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if if (use_buffers) then - iaux(:) = 0 #if defined(OPENMP) + !$omp workshare + iaux(:) = 0 + !$omp end workshare + maxnr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux) & + !$OMP shared(nzin,ia,nr,iaux,maxnr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4319,7 +4322,9 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO + maxnr = maxval(iaux(1:nr)) #else + iaux(:) = 0 !srt_inp = .true. do i=1,nzin if ((ia(i) < 1).or.(ia(i) > nr)) then @@ -4342,22 +4347,21 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf if (use_buffers) then #if defined(OPENMP) maxthreads = omp_get_max_threads() - allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if + !$omp workshare kaux(:) = 0 - sum(:) = 0 - sum(1) = 1 + !$omp end workshare err = 0 - ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4382,60 +4386,67 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !write(0,*) 'fix_coo_inner: trying with exscan' call psi_exscan(nr+1,iaux,info,shift=ione) !$OMP BARRIER - !$OMP SINGLE - !t0 = omp_get_wtime() - !$OMP END SINGLE - + + ! ------------------ Sorting and buffers ------------------- ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! unmodified 'iaux' - do j=idxstart,idxend + + !$omp do private(j) + do j=1,nr+1 idxaux(j) = iaux(j) end do + !$omp end do + ! Here we sort data inside the auxiliary buffers + !$omp do private(act_row,i,i1) do i=1,nzin act_row = ia(i) - if ((act_row >= idxstart) .and. (act_row <= idxend)) then - ias(idxaux(act_row)) = ia(i) - jas(idxaux(act_row)) = ja(i) - vs(idxaux(act_row)) = val(i) - idxaux(act_row) = idxaux(act_row) + 1 - end if + !$omp atomic capture + i1 =idxaux(act_row) + idxaux(act_row) = idxaux(act_row) + 1 + !$omp end atomic + ias(i1) = ia(i) + jas(i1) = ja(i) + vs(i1) = val(i) end do + !$omp end do - !$OMP BARRIER - !$OMP SINGLE - !t1 = omp_get_wtime() - !write(0,*) ithread,'Srt&Cpy :',t1-t0 - !$OMP END SINGLE ! Let's sort column indices and values. After that we will store ! the number of unique values in 'kaux' - do j=idxstart,idxend - first_elem = iaux(j) - last_elem = iaux(j+1) - 1 - nzl = last_elem - first_elem + 1 - - ! The row has elements? - if (nzl > 0) then - call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(maxnr+2)) + !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 - ! Over each row we count the unique values - kaux(j) = 1 - do i=first_elem+1,last_elem - if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then - cycle + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & ixt,iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & ixt) end if - kaux(j) = kaux(j) + 1 - end do - end if - end do + + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + !$omp end do + deallocate(ixt) + end block ! -------------------------------------------------- ! ---------------- kaux composition ---------------- @@ -4553,7 +4564,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf nzout = kaux(nr+1) - 1 - deallocate(sum,kaux,idxaux,stat=info) + deallocate(kaux,idxaux,stat=info) #else !if (.not.srt_inp) then diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 356914de..a7e527e5 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -4268,7 +4268,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) complex(psb_dpk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: nza, nzl,iret, maxnr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4277,7 +4277,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) + integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) #endif info = psb_success_ @@ -4302,10 +4302,13 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if if (use_buffers) then - iaux(:) = 0 #if defined(OPENMP) + !$omp workshare + iaux(:) = 0 + !$omp end workshare + maxnr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux) & + !$OMP shared(nzin,ia,nr,iaux,maxnr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4319,7 +4322,9 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO + maxnr = maxval(iaux(1:nr)) #else + iaux(:) = 0 !srt_inp = .true. do i=1,nzin if ((ia(i) < 1).or.(ia(i) > nr)) then @@ -4342,22 +4347,21 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf if (use_buffers) then #if defined(OPENMP) maxthreads = omp_get_max_threads() - allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if + !$omp workshare kaux(:) = 0 - sum(:) = 0 - sum(1) = 1 + !$omp end workshare err = 0 - ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4382,60 +4386,67 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !write(0,*) 'fix_coo_inner: trying with exscan' call psi_exscan(nr+1,iaux,info,shift=ione) !$OMP BARRIER - !$OMP SINGLE - !t0 = omp_get_wtime() - !$OMP END SINGLE - + + ! ------------------ Sorting and buffers ------------------- ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! unmodified 'iaux' - do j=idxstart,idxend + + !$omp do private(j) + do j=1,nr+1 idxaux(j) = iaux(j) end do + !$omp end do + ! Here we sort data inside the auxiliary buffers + !$omp do private(act_row,i,i1) do i=1,nzin act_row = ia(i) - if ((act_row >= idxstart) .and. (act_row <= idxend)) then - ias(idxaux(act_row)) = ia(i) - jas(idxaux(act_row)) = ja(i) - vs(idxaux(act_row)) = val(i) - idxaux(act_row) = idxaux(act_row) + 1 - end if + !$omp atomic capture + i1 =idxaux(act_row) + idxaux(act_row) = idxaux(act_row) + 1 + !$omp end atomic + ias(i1) = ia(i) + jas(i1) = ja(i) + vs(i1) = val(i) end do + !$omp end do - !$OMP BARRIER - !$OMP SINGLE - !t1 = omp_get_wtime() - !write(0,*) ithread,'Srt&Cpy :',t1-t0 - !$OMP END SINGLE ! Let's sort column indices and values. After that we will store ! the number of unique values in 'kaux' - do j=idxstart,idxend - first_elem = iaux(j) - last_elem = iaux(j+1) - 1 - nzl = last_elem - first_elem + 1 - - ! The row has elements? - if (nzl > 0) then - call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(maxnr+2)) + !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 - ! Over each row we count the unique values - kaux(j) = 1 - do i=first_elem+1,last_elem - if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then - cycle + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & ixt,iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & ixt) end if - kaux(j) = kaux(j) + 1 - end do - end if - end do + + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + !$omp end do + deallocate(ixt) + end block ! -------------------------------------------------- ! ---------------- kaux composition ---------------- @@ -4553,7 +4564,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf nzout = kaux(nr+1) - 1 - deallocate(sum,kaux,idxaux,stat=info) + deallocate(kaux,idxaux,stat=info) #else !if (.not.srt_inp) then From 70f51b9da89cb36897bedbdb80f29fbc439ac21e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 15 May 2024 12:05:15 +0200 Subject: [PATCH 096/110] Improve handling of fix_coo buffers with OpenMP --- base/serial/impl/psb_c_coo_impl.F90 | 81 +++++++++++++++++------------ base/serial/impl/psb_d_coo_impl.F90 | 81 +++++++++++++++++------------ base/serial/impl/psb_s_coo_impl.F90 | 81 +++++++++++++++++------------ base/serial/impl/psb_z_coo_impl.F90 | 81 +++++++++++++++++------------ 4 files changed, 188 insertions(+), 136 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index cc3c6842..1b015ab1 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -4174,7 +4174,6 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) #endif info = psb_success_ @@ -4301,7 +4300,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use_buffers = .false. end if - if (use_buffers) then + !if (use_buffers) then #if defined(OPENMP) !$omp workshare iaux(:) = 0 @@ -4322,7 +4321,14 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = maxval(iaux(1:nr)) + maxnr = 0 + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP private(i) shared(nr,iaux)& + !$OMP reduction(max:maxnr) + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do + !$OMP END PARALLEL DO #else iaux(:) = 0 !srt_inp = .true. @@ -4338,8 +4344,12 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do + maxnr = 0 + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do #endif - end if + !end if ! Check again use_buffers. We enter here if nzin >= nr and ! all the indices are valid @@ -4417,7 +4427,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(maxnr+2)) + allocate(ixt(2*maxnr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4721,7 +4731,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4743,38 +4753,41 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf idxend = idxstart + work - 1 - ! --------------------------------------------------- + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnr+2)) + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_row = idxstart + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > idxend) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 - first_elem = 0 - last_elem = -1 - act_row = idxstart - do j=1,nzin - if (ia(j) < act_row) then - cycle - else if ((ia(j) > idxend) .or. (work < 1)) then - exit - else if (ia(j) > act_row) then - nzl = last_elem - first_elem + 1 + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt) + end if - if (nzl > 0) then - call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if - act_row = act_row + 1 - first_elem = 0 - last_elem = -1 - else - if (first_elem == 0) then - first_elem = j + last_elem = j end if - - last_elem = j - end if - end do + end do + end block !$OMP END PARALLEL #else i = 1 diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index e523d83d..e714ef5e 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -4174,7 +4174,6 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) #endif info = psb_success_ @@ -4301,7 +4300,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use_buffers = .false. end if - if (use_buffers) then + !if (use_buffers) then #if defined(OPENMP) !$omp workshare iaux(:) = 0 @@ -4322,7 +4321,14 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = maxval(iaux(1:nr)) + maxnr = 0 + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP private(i) shared(nr,iaux)& + !$OMP reduction(max:maxnr) + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do + !$OMP END PARALLEL DO #else iaux(:) = 0 !srt_inp = .true. @@ -4338,8 +4344,12 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do + maxnr = 0 + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do #endif - end if + !end if ! Check again use_buffers. We enter here if nzin >= nr and ! all the indices are valid @@ -4417,7 +4427,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(maxnr+2)) + allocate(ixt(2*maxnr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4721,7 +4731,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4743,38 +4753,41 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf idxend = idxstart + work - 1 - ! --------------------------------------------------- + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnr+2)) + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_row = idxstart + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > idxend) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 - first_elem = 0 - last_elem = -1 - act_row = idxstart - do j=1,nzin - if (ia(j) < act_row) then - cycle - else if ((ia(j) > idxend) .or. (work < 1)) then - exit - else if (ia(j) > act_row) then - nzl = last_elem - first_elem + 1 + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt) + end if - if (nzl > 0) then - call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if - act_row = act_row + 1 - first_elem = 0 - last_elem = -1 - else - if (first_elem == 0) then - first_elem = j + last_elem = j end if - - last_elem = j - end if - end do + end do + end block !$OMP END PARALLEL #else i = 1 diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index b46731fc..029b9dbb 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -4174,7 +4174,6 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) #endif info = psb_success_ @@ -4301,7 +4300,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use_buffers = .false. end if - if (use_buffers) then + !if (use_buffers) then #if defined(OPENMP) !$omp workshare iaux(:) = 0 @@ -4322,7 +4321,14 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = maxval(iaux(1:nr)) + maxnr = 0 + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP private(i) shared(nr,iaux)& + !$OMP reduction(max:maxnr) + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do + !$OMP END PARALLEL DO #else iaux(:) = 0 !srt_inp = .true. @@ -4338,8 +4344,12 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do + maxnr = 0 + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do #endif - end if + !end if ! Check again use_buffers. We enter here if nzin >= nr and ! all the indices are valid @@ -4417,7 +4427,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(maxnr+2)) + allocate(ixt(2*maxnr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4721,7 +4731,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4743,38 +4753,41 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf idxend = idxstart + work - 1 - ! --------------------------------------------------- + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnr+2)) + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_row = idxstart + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > idxend) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 - first_elem = 0 - last_elem = -1 - act_row = idxstart - do j=1,nzin - if (ia(j) < act_row) then - cycle - else if ((ia(j) > idxend) .or. (work < 1)) then - exit - else if (ia(j) > act_row) then - nzl = last_elem - first_elem + 1 + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt) + end if - if (nzl > 0) then - call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if - act_row = act_row + 1 - first_elem = 0 - last_elem = -1 - else - if (first_elem == 0) then - first_elem = j + last_elem = j end if - - last_elem = j - end if - end do + end do + end block !$OMP END PARALLEL #else i = 1 diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index a7e527e5..32dd80b9 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -4174,7 +4174,6 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) #endif info = psb_success_ @@ -4301,7 +4300,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use_buffers = .false. end if - if (use_buffers) then + !if (use_buffers) then #if defined(OPENMP) !$omp workshare iaux(:) = 0 @@ -4322,7 +4321,14 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = maxval(iaux(1:nr)) + maxnr = 0 + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP private(i) shared(nr,iaux)& + !$OMP reduction(max:maxnr) + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do + !$OMP END PARALLEL DO #else iaux(:) = 0 !srt_inp = .true. @@ -4338,8 +4344,12 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do + maxnr = 0 + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do #endif - end if + !end if ! Check again use_buffers. We enter here if nzin >= nr and ! all the indices are valid @@ -4417,7 +4427,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(maxnr+2)) + allocate(ixt(2*maxnr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4721,7 +4731,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4743,38 +4753,41 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf idxend = idxstart + work - 1 - ! --------------------------------------------------- + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnr+2)) + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_row = idxstart + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > idxend) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 - first_elem = 0 - last_elem = -1 - act_row = idxstart - do j=1,nzin - if (ia(j) < act_row) then - cycle - else if ((ia(j) > idxend) .or. (work < 1)) then - exit - else if (ia(j) > act_row) then - nzl = last_elem - first_elem + 1 + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt) + end if - if (nzl > 0) then - call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if - act_row = act_row + 1 - first_elem = 0 - last_elem = -1 - else - if (first_elem == 0) then - first_elem = j + last_elem = j end if - - last_elem = j - end if - end do + end do + end block !$OMP END PARALLEL #else i = 1 From c8cc2275d07182637c75309cd5c42b4a8595e0c4 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 15 May 2024 16:39:22 +0200 Subject: [PATCH 097/110] Fix cuda/makefile for make -j --- cuda/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda/Makefile b/cuda/Makefile index 33787f4e..7e428629 100755 --- a/cuda/Makefile +++ b/cuda/Makefile @@ -64,7 +64,7 @@ spgpulib: $(MAKE) -C spgpu lib -diagdev_mod.o dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o +hdiagdev_mod.o diagdev_mod.o dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o: psb_i_cuda_vect_mod.o psb_i_cuda_vect_mod.o : psb_vectordev_mod.o psb_cuda_env_mod.o cusparse_mod.o: s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o From 35096a2ef9da6a698af6c7b479f3fb26498da518 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 16 May 2024 10:05:11 +0200 Subject: [PATCH 098/110] Cosmetic changes to coo_impl --- base/serial/impl/psb_c_coo_impl.F90 | 24 ++++++++++++------------ base/serial/impl/psb_d_coo_impl.F90 | 24 ++++++++++++------------ base/serial/impl/psb_s_coo_impl.F90 | 24 ++++++++++++------------ base/serial/impl/psb_z_coo_impl.F90 | 24 ++++++++++++------------ 4 files changed, 48 insertions(+), 48 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 1b015ab1..60155771 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -4267,7 +4267,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) complex(psb_spk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret, maxnr + integer(psb_ipk_) :: nza, nzl,iret, maxnzr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4305,9 +4305,9 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !$omp workshare iaux(:) = 0 !$omp end workshare - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux,maxnr) & + !$OMP shared(nzin,ia,nr,iaux,maxnzr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4321,12 +4321,12 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP private(i) shared(nr,iaux)& - !$OMP reduction(max:maxnr) + !$OMP reduction(max:maxnzr) do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do !$OMP END PARALLEL DO #else @@ -4344,9 +4344,9 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do - maxnr = 0 + maxnzr = 0 do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do #endif !end if @@ -4371,7 +4371,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4427,7 +4427,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4731,7 +4731,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4755,7 +4755,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) ! --------------------------------------------------- first_elem = 0 diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index e714ef5e..d92b0dc5 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -4267,7 +4267,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) real(psb_dpk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret, maxnr + integer(psb_ipk_) :: nza, nzl,iret, maxnzr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4305,9 +4305,9 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !$omp workshare iaux(:) = 0 !$omp end workshare - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux,maxnr) & + !$OMP shared(nzin,ia,nr,iaux,maxnzr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4321,12 +4321,12 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP private(i) shared(nr,iaux)& - !$OMP reduction(max:maxnr) + !$OMP reduction(max:maxnzr) do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do !$OMP END PARALLEL DO #else @@ -4344,9 +4344,9 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do - maxnr = 0 + maxnzr = 0 do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do #endif !end if @@ -4371,7 +4371,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4427,7 +4427,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4731,7 +4731,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4755,7 +4755,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) ! --------------------------------------------------- first_elem = 0 diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 029b9dbb..01d82399 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -4267,7 +4267,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) real(psb_spk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret, maxnr + integer(psb_ipk_) :: nza, nzl,iret, maxnzr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4305,9 +4305,9 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !$omp workshare iaux(:) = 0 !$omp end workshare - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux,maxnr) & + !$OMP shared(nzin,ia,nr,iaux,maxnzr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4321,12 +4321,12 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP private(i) shared(nr,iaux)& - !$OMP reduction(max:maxnr) + !$OMP reduction(max:maxnzr) do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do !$OMP END PARALLEL DO #else @@ -4344,9 +4344,9 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do - maxnr = 0 + maxnzr = 0 do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do #endif !end if @@ -4371,7 +4371,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4427,7 +4427,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4731,7 +4731,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4755,7 +4755,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) ! --------------------------------------------------- first_elem = 0 diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 32dd80b9..c17ce628 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -4267,7 +4267,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) complex(psb_dpk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret, maxnr + integer(psb_ipk_) :: nza, nzl,iret, maxnzr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4305,9 +4305,9 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !$omp workshare iaux(:) = 0 !$omp end workshare - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux,maxnr) & + !$OMP shared(nzin,ia,nr,iaux,maxnzr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4321,12 +4321,12 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP private(i) shared(nr,iaux)& - !$OMP reduction(max:maxnr) + !$OMP reduction(max:maxnzr) do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do !$OMP END PARALLEL DO #else @@ -4344,9 +4344,9 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do - maxnr = 0 + maxnzr = 0 do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do #endif !end if @@ -4371,7 +4371,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4427,7 +4427,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4731,7 +4731,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4755,7 +4755,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) ! --------------------------------------------------- first_elem = 0 From e19284eb6cb36d599ea3a9c73aafbd6f16ecfb98 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 16 May 2024 10:10:08 +0200 Subject: [PATCH 099/110] Small omp addition --- base/serial/impl/psb_c_coo_impl.F90 | 3 ++- base/serial/impl/psb_d_coo_impl.F90 | 3 ++- base/serial/impl/psb_s_coo_impl.F90 | 3 ++- base/serial/impl/psb_z_coo_impl.F90 | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 60155771..d6117546 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -165,7 +165,8 @@ subroutine psb_c_coo_scals(d,a,info) if (a%is_unit()) then call a%make_nonunit() end if - + + !$omp parallel do private(i) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d enddo diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index d92b0dc5..4cb4c3ec 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -165,7 +165,8 @@ subroutine psb_d_coo_scals(d,a,info) if (a%is_unit()) then call a%make_nonunit() end if - + + !$omp parallel do private(i) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d enddo diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 01d82399..f706db33 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -165,7 +165,8 @@ subroutine psb_s_coo_scals(d,a,info) if (a%is_unit()) then call a%make_nonunit() end if - + + !$omp parallel do private(i) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d enddo diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index c17ce628..c368ce91 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -165,7 +165,8 @@ subroutine psb_z_coo_scals(d,a,info) if (a%is_unit()) then call a%make_nonunit() end if - + + !$omp parallel do private(i) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d enddo From 12a4c21fedff07f50328f2ed856e0187522469ab Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 20 May 2024 11:47:32 +0200 Subject: [PATCH 100/110] Fixes for OpenMP compilation in map_mod --- base/modules/desc/psb_gen_block_map_mod.F90 | 4 ++-- base/modules/desc/psb_repl_map_mod.F90 | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/base/modules/desc/psb_gen_block_map_mod.F90 b/base/modules/desc/psb_gen_block_map_mod.F90 index 650bb430..82a4cc15 100644 --- a/base/modules/desc/psb_gen_block_map_mod.F90 +++ b/base/modules/desc/psb_gen_block_map_mod.F90 @@ -282,7 +282,7 @@ contains if (present(mask)) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idxin,idxout,idxmap,owned_,info) & + !$omp shared(mask,idxin,idxout,idxmap,owned_,info,im) & !$omp private(i) do i=1, im if (mask(i)) then @@ -300,7 +300,7 @@ contains !$omp end parallel do else if (.not.present(mask)) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idxin,idxout,idxmap,owned_,info) & + !$omp shared(idxin,idxout,idxmap,owned_,info,im) & !$omp private(i) do i=1, im if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then diff --git a/base/modules/desc/psb_repl_map_mod.F90 b/base/modules/desc/psb_repl_map_mod.F90 index 738d6de2..f68ae3b8 100644 --- a/base/modules/desc/psb_repl_map_mod.F90 +++ b/base/modules/desc/psb_repl_map_mod.F90 @@ -228,7 +228,7 @@ contains if (present(mask)) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idxin,idxout,idxmap) & + !$omp shared(mask,idxin,idxout,idxmap,im) & !$omp private(i) do i=1, im if (mask(i)) then @@ -242,7 +242,7 @@ contains !$omp end parallel do else if (.not.present(mask)) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idxin,idxout,idxmap) & + !$omp shared(idxin,idxout,idxmap,im) & !$omp private(i) do i=1, im if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then @@ -333,7 +333,7 @@ contains if (idxmap%is_asb()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idx,idxmap) & + !$omp shared(mask,idx,idxmap,is) & !$omp private(i) do i=1, is if (mask(i)) then @@ -347,7 +347,7 @@ contains !$omp end parallel do else if (idxmap%is_valid()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idx,idxmap) & + !$omp shared(mask,idx,idxmap,is) & !$omp private(i) do i=1,is if (mask(i)) then @@ -369,7 +369,7 @@ contains if (idxmap%is_asb()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idx,idxmap) & + !$omp shared(idx,idxmap,is) & !$omp private(i) do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -381,7 +381,7 @@ contains !$omp end parallel do else if (idxmap%is_valid()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idx,idxmap) & + !$omp shared(idx,idxmap,is) & !$omp private(i) do i=1,is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -434,7 +434,7 @@ contains if (idxmap%is_asb()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idxin,idxout,idxmap) & + !$omp shared(mask,idxin,idxout,idxmap,im) & !$omp private(i) do i=1, im if (mask(i)) then @@ -448,7 +448,7 @@ contains !$omp end parallel do else if (idxmap%is_valid()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idxin,idxout,idxmap) & + !$omp shared(mask,idxin,idxout,idxmap,im) & !$omp private(i) do i=1,im if (mask(i)) then @@ -469,7 +469,7 @@ contains if (idxmap%is_asb()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idxin,idxout,idxmap) & + !$omp shared(idxin,idxout,idxmap,im) & !$omp private(i) do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -481,7 +481,7 @@ contains !$omp end parallel do else if (idxmap%is_valid()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idxin,idxout,idxmap) & + !$omp shared(idxin,idxout,idxmap,im) & !$omp private(i) do i=1,im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then From ee66db5efde31451f7673bf05bbb40fe482c9055 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 21 May 2024 11:25:38 +0200 Subject: [PATCH 101/110] Refactor interface to cusparse in preparation for CSR Adaptive --- cuda/ccusparse.c | 59 +-------------------------- cuda/ccusparse.h | 100 ++++++++++++++++++++++++++++++++++++++++++++++ cuda/dcusparse.c | 59 +-------------------------- cuda/dcusparse.h | 101 ++++++++++++++++++++++++++++++++++++++++++++++ cuda/scusparse.c | 58 +-------------------------- cuda/scusparse.h | 102 +++++++++++++++++++++++++++++++++++++++++++++++ cuda/zcusparse.c | 59 +-------------------------- cuda/zcusparse.h | 101 ++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 408 insertions(+), 231 deletions(-) create mode 100644 cuda/ccusparse.h create mode 100644 cuda/dcusparse.h create mode 100644 cuda/scusparse.h create mode 100644 cuda/zcusparse.h diff --git a/cuda/ccusparse.c b/cuda/ccusparse.c index bab6ede0..3a6556ce 100644 --- a/cuda/ccusparse.c +++ b/cuda/ccusparse.c @@ -38,62 +38,5 @@ #include "cintrf.h" #include "fcusparse.h" - -/* Double precision real */ -#define TYPE float complex -#define CUSPARSE_BASE_TYPE CUDA_C_32F -#define T_CSRGDeviceMat c_CSRGDeviceMat -#define T_Cmat c_Cmat -#define T_spmvCSRGDevice c_spmvCSRGDevice -#define T_spsvCSRGDevice c_spsvCSRGDevice -#define T_CSRGDeviceAlloc c_CSRGDeviceAlloc -#define T_CSRGDeviceFree c_CSRGDeviceFree -#define T_CSRGHost2Device c_CSRGHost2Device -#define T_CSRGDevice2Host c_CSRGDevice2Host -#define T_CSRGDeviceSetMatFillMode c_CSRGDeviceSetMatFillMode -#define T_CSRGDeviceSetMatDiagType c_CSRGDeviceSetMatDiagType -#define T_CSRGDeviceGetParms c_CSRGDeviceGetParms - -#if CUDA_SHORT_VERSION <= 10 -#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis -#define cusparseTcsrmv cusparseCcsrmv -#define cusparseTcsrsv_solve cusparseCcsrsv_solve -#define cusparseTcsrsv_analysis cusparseCcsrsv_analysis -#define T_HYBGDeviceMat c_HYBGDeviceMat -#define T_Hmat c_Hmat -#define T_HYBGDeviceFree c_HYBGDeviceFree -#define T_spmvHYBGDevice c_spmvHYBGDevice -#define T_HYBGDeviceAlloc c_HYBGDeviceAlloc -#define T_HYBGDeviceSetMatDiagType c_HYBGDeviceSetMatDiagType -#define T_HYBGDeviceSetMatIndexBase c_HYBGDeviceSetMatIndexBase -#define T_HYBGDeviceSetMatType c_HYBGDeviceSetMatType -#define T_HYBGDeviceSetMatFillMode c_HYBGDeviceSetMatFillMode -#define T_HYBGDeviceHybsmAnalysis c_HYBGDeviceHybsmAnalysis -#define T_spsvHYBGDevice c_spsvHYBGDevice -#define T_HYBGHost2Device c_HYBGHost2Device -#define cusparseThybmv cusparseChybmv -#define cusparseThybsv_solve cusparseChybsv_solve -#define cusparseThybsv_analysis cusparseChybsv_analysis -#define cusparseTcsr2hyb cusparseCcsr2hyb - -#elif CUDA_VERSION < 11030 - -#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis -#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize -#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis -#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve -#else - -#define T_CSRGIsNullSvBuffer c_CSRGIsNullSvBuffer -#define T_CSRGIsNullSvDescr c_CSRGIsNullSvDescr -#define T_CSRGIsNullMvDescr c_CSRGIsNullMvDescr -#define T_CSRGCreateSpMVDescr c_CSRGCreateSpMVDescr - -#endif - +#include "ccusparse.h" #include "fcusparse_fct.h" - diff --git a/cuda/ccusparse.h b/cuda/ccusparse.h new file mode 100644 index 00000000..f101c73d --- /dev/null +++ b/cuda/ccusparse.h @@ -0,0 +1,100 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef CCUSPARSE_ +#define CCUSPARSE_ + + +#include +#include + +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + + +/* Double precision real */ +#define TYPE float complex +#define CUSPARSE_BASE_TYPE CUDA_C_32F +#define T_CSRGDeviceMat c_CSRGDeviceMat +#define T_Cmat c_Cmat +#define T_spmvCSRGDevice c_spmvCSRGDevice +#define T_spsvCSRGDevice c_spsvCSRGDevice +#define T_CSRGDeviceAlloc c_CSRGDeviceAlloc +#define T_CSRGDeviceFree c_CSRGDeviceFree +#define T_CSRGHost2Device c_CSRGHost2Device +#define T_CSRGDevice2Host c_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode c_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType c_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms c_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseCcsrmv +#define cusparseTcsrsv_solve cusparseCcsrsv_solve +#define cusparseTcsrsv_analysis cusparseCcsrsv_analysis +#define T_HYBGDeviceMat c_HYBGDeviceMat +#define T_Hmat c_Hmat +#define T_HYBGDeviceFree c_HYBGDeviceFree +#define T_spmvHYBGDevice c_spmvHYBGDevice +#define T_HYBGDeviceAlloc c_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType c_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase c_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType c_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode c_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis c_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice c_spsvHYBGDevice +#define T_HYBGHost2Device c_HYBGHost2Device +#define cusparseThybmv cusparseChybmv +#define cusparseThybsv_solve cusparseChybsv_solve +#define cusparseThybsv_analysis cusparseChybsv_analysis +#define cusparseTcsr2hyb cusparseCcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer c_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr c_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr c_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr c_CSRGCreateSpMVDescr + +#endif + +#endif diff --git a/cuda/dcusparse.c b/cuda/dcusparse.c index 657ca5be..6bb726c9 100644 --- a/cuda/dcusparse.c +++ b/cuda/dcusparse.c @@ -38,62 +38,5 @@ #include "cintrf.h" #include "fcusparse.h" - -/* Double precision real */ -#define TYPE double -#define CUSPARSE_BASE_TYPE CUDA_R_64F -#define T_CSRGDeviceMat d_CSRGDeviceMat -#define T_Cmat d_Cmat -#define T_spmvCSRGDevice d_spmvCSRGDevice -#define T_spsvCSRGDevice d_spsvCSRGDevice -#define T_CSRGDeviceAlloc d_CSRGDeviceAlloc -#define T_CSRGDeviceFree d_CSRGDeviceFree -#define T_CSRGHost2Device d_CSRGHost2Device -#define T_CSRGDevice2Host d_CSRGDevice2Host -#define T_CSRGDeviceSetMatFillMode d_CSRGDeviceSetMatFillMode -#define T_CSRGDeviceSetMatDiagType d_CSRGDeviceSetMatDiagType -#define T_CSRGDeviceGetParms d_CSRGDeviceGetParms - -#if CUDA_SHORT_VERSION <= 10 -#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsmAnalysis d_CSRGDeviceCsrsmAnalysis -#define cusparseTcsrmv cusparseDcsrmv -#define cusparseTcsrsv_solve cusparseDcsrsv_solve -#define cusparseTcsrsv_analysis cusparseDcsrsv_analysis -#define T_HYBGDeviceMat d_HYBGDeviceMat -#define T_Hmat d_Hmat -#define T_HYBGDeviceFree d_HYBGDeviceFree -#define T_spmvHYBGDevice d_spmvHYBGDevice -#define T_HYBGDeviceAlloc d_HYBGDeviceAlloc -#define T_HYBGDeviceSetMatDiagType d_HYBGDeviceSetMatDiagType -#define T_HYBGDeviceSetMatIndexBase d_HYBGDeviceSetMatIndexBase -#define T_HYBGDeviceSetMatType d_HYBGDeviceSetMatType -#define T_HYBGDeviceSetMatFillMode d_HYBGDeviceSetMatFillMode -#define T_HYBGDeviceHybsmAnalysis d_HYBGDeviceHybsmAnalysis -#define T_spsvHYBGDevice d_spsvHYBGDevice -#define T_HYBGHost2Device d_HYBGHost2Device -#define cusparseThybmv cusparseDhybmv -#define cusparseThybsv_solve cusparseDhybsv_solve -#define cusparseThybsv_analysis cusparseDhybsv_analysis -#define cusparseTcsr2hyb cusparseDcsr2hyb - -#elif CUDA_VERSION < 11030 - -#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsv2Analysis d_CSRGDeviceCsrsv2Analysis -#define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize -#define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis -#define cusparseTcsrsv2_solve cusparseDcsrsv2_solve -#else - -#define T_CSRGIsNullSvBuffer d_CSRGIsNullSvBuffer -#define T_CSRGIsNullSvDescr d_CSRGIsNullSvDescr -#define T_CSRGIsNullMvDescr d_CSRGIsNullMvDescr -#define T_CSRGCreateSpMVDescr d_CSRGCreateSpMVDescr - -#endif - +#include "dcusparse.h" #include "fcusparse_fct.h" - diff --git a/cuda/dcusparse.h b/cuda/dcusparse.h new file mode 100644 index 00000000..d7875650 --- /dev/null +++ b/cuda/dcusparse.h @@ -0,0 +1,101 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef DCUSPARSE_ +#define DCUSPARSE_ + + +#include +#include + +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + + +/* Double precision real */ +#define TYPE double +#define CUSPARSE_BASE_TYPE CUDA_R_64F +#define T_CSRGDeviceMat d_CSRGDeviceMat +#define T_Cmat d_Cmat +#define T_spmvCSRGDevice d_spmvCSRGDevice +#define T_spsvCSRGDevice d_spsvCSRGDevice +#define T_CSRGDeviceAlloc d_CSRGDeviceAlloc +#define T_CSRGDeviceFree d_CSRGDeviceFree +#define T_CSRGHost2Device d_CSRGHost2Device +#define T_CSRGDevice2Host d_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode d_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType d_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms d_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis d_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseDcsrmv +#define cusparseTcsrsv_solve cusparseDcsrsv_solve +#define cusparseTcsrsv_analysis cusparseDcsrsv_analysis +#define T_HYBGDeviceMat d_HYBGDeviceMat +#define T_Hmat d_Hmat +#define T_HYBGDeviceFree d_HYBGDeviceFree +#define T_spmvHYBGDevice d_spmvHYBGDevice +#define T_HYBGDeviceAlloc d_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType d_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase d_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType d_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode d_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis d_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice d_spsvHYBGDevice +#define T_HYBGHost2Device d_HYBGHost2Device +#define cusparseThybmv cusparseDhybmv +#define cusparseThybsv_solve cusparseDhybsv_solve +#define cusparseThybsv_analysis cusparseDhybsv_analysis +#define cusparseTcsr2hyb cusparseDcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis d_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseDcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer d_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr d_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr d_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr d_CSRGCreateSpMVDescr + +#endif + +#endif + diff --git a/cuda/scusparse.c b/cuda/scusparse.c index d4db9b7c..f54a35fd 100644 --- a/cuda/scusparse.c +++ b/cuda/scusparse.c @@ -38,62 +38,6 @@ #include "cintrf.h" #include "fcusparse.h" - -/* Double precision real */ -#define TYPE float -#define CUSPARSE_BASE_TYPE CUDA_R_32F -#define T_CSRGDeviceMat s_CSRGDeviceMat -#define T_Cmat s_Cmat -#define T_spmvCSRGDevice s_spmvCSRGDevice -#define T_spsvCSRGDevice s_spsvCSRGDevice -#define T_CSRGDeviceAlloc s_CSRGDeviceAlloc -#define T_CSRGDeviceFree s_CSRGDeviceFree -#define T_CSRGHost2Device s_CSRGHost2Device -#define T_CSRGDevice2Host s_CSRGDevice2Host -#define T_CSRGDeviceSetMatFillMode s_CSRGDeviceSetMatFillMode -#define T_CSRGDeviceSetMatDiagType s_CSRGDeviceSetMatDiagType -#define T_CSRGDeviceGetParms s_CSRGDeviceGetParms - -#if CUDA_SHORT_VERSION <= 10 -#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsmAnalysis s_CSRGDeviceCsrsmAnalysis -#define cusparseTcsrmv cusparseScsrmv -#define cusparseTcsrsv_solve cusparseScsrsv_solve -#define cusparseTcsrsv_analysis cusparseScsrsv_analysis -#define T_HYBGDeviceMat s_HYBGDeviceMat -#define T_Hmat s_Hmat -#define T_HYBGDeviceFree s_HYBGDeviceFree -#define T_spmvHYBGDevice s_spmvHYBGDevice -#define T_HYBGDeviceAlloc s_HYBGDeviceAlloc -#define T_HYBGDeviceSetMatDiagType s_HYBGDeviceSetMatDiagType -#define T_HYBGDeviceSetMatIndexBase s_HYBGDeviceSetMatIndexBase -#define T_HYBGDeviceSetMatType s_HYBGDeviceSetMatType -#define T_HYBGDeviceSetMatFillMode s_HYBGDeviceSetMatFillMode -#define T_HYBGDeviceHybsmAnalysis s_HYBGDeviceHybsmAnalysis -#define T_spsvHYBGDevice s_spsvHYBGDevice -#define T_HYBGHost2Device s_HYBGHost2Device -#define cusparseThybmv cusparseShybmv -#define cusparseThybsv_solve cusparseShybsv_solve -#define cusparseThybsv_analysis cusparseShybsv_analysis -#define cusparseTcsr2hyb cusparseScsr2hyb - -#elif CUDA_VERSION < 11030 - -#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsv2Analysis s_CSRGDeviceCsrsv2Analysis -#define cusparseTcsrsv2_bufferSize cusparseScsrsv2_bufferSize -#define cusparseTcsrsv2_analysis cusparseScsrsv2_analysis -#define cusparseTcsrsv2_solve cusparseScsrsv2_solve -#else - -#define T_CSRGIsNullSvBuffer s_CSRGIsNullSvBuffer -#define T_CSRGIsNullSvDescr s_CSRGIsNullSvDescr -#define T_CSRGIsNullMvDescr s_CSRGIsNullMvDescr -#define T_CSRGCreateSpMVDescr s_CSRGCreateSpMVDescr - -#endif - +#include "scusparse.h" #include "fcusparse_fct.h" diff --git a/cuda/scusparse.h b/cuda/scusparse.h new file mode 100644 index 00000000..724bdc2a --- /dev/null +++ b/cuda/scusparse.h @@ -0,0 +1,102 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef SCUSPARSE_ +#define SCUSPARSE_ + + + +#include +#include + +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + + +/* Double precision real */ +#define TYPE float +#define CUSPARSE_BASE_TYPE CUDA_R_32F +#define T_CSRGDeviceMat s_CSRGDeviceMat +#define T_Cmat s_Cmat +#define T_spmvCSRGDevice s_spmvCSRGDevice +#define T_spsvCSRGDevice s_spsvCSRGDevice +#define T_CSRGDeviceAlloc s_CSRGDeviceAlloc +#define T_CSRGDeviceFree s_CSRGDeviceFree +#define T_CSRGHost2Device s_CSRGHost2Device +#define T_CSRGDevice2Host s_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode s_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType s_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms s_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis s_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseScsrmv +#define cusparseTcsrsv_solve cusparseScsrsv_solve +#define cusparseTcsrsv_analysis cusparseScsrsv_analysis +#define T_HYBGDeviceMat s_HYBGDeviceMat +#define T_Hmat s_Hmat +#define T_HYBGDeviceFree s_HYBGDeviceFree +#define T_spmvHYBGDevice s_spmvHYBGDevice +#define T_HYBGDeviceAlloc s_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType s_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase s_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType s_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode s_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis s_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice s_spsvHYBGDevice +#define T_HYBGHost2Device s_HYBGHost2Device +#define cusparseThybmv cusparseShybmv +#define cusparseThybsv_solve cusparseShybsv_solve +#define cusparseThybsv_analysis cusparseShybsv_analysis +#define cusparseTcsr2hyb cusparseScsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis s_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseScsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseScsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseScsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer s_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr s_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr s_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr s_CSRGCreateSpMVDescr + +#endif + +#endif + diff --git a/cuda/zcusparse.c b/cuda/zcusparse.c index a70a6573..2ae0c04e 100644 --- a/cuda/zcusparse.c +++ b/cuda/zcusparse.c @@ -38,62 +38,5 @@ #include "cintrf.h" #include "fcusparse.h" - -/* Double precision real */ -#define TYPE double complex -#define CUSPARSE_BASE_TYPE CUDA_C_64F -#define T_CSRGDeviceMat z_CSRGDeviceMat -#define T_Cmat z_Cmat -#define T_spmvCSRGDevice z_spmvCSRGDevice -#define T_spsvCSRGDevice z_spsvCSRGDevice -#define T_CSRGDeviceAlloc z_CSRGDeviceAlloc -#define T_CSRGDeviceFree z_CSRGDeviceFree -#define T_CSRGHost2Device z_CSRGHost2Device -#define T_CSRGDevice2Host z_CSRGDevice2Host -#define T_CSRGDeviceSetMatFillMode z_CSRGDeviceSetMatFillMode -#define T_CSRGDeviceSetMatDiagType z_CSRGDeviceSetMatDiagType -#define T_CSRGDeviceGetParms z_CSRGDeviceGetParms - -#if CUDA_SHORT_VERSION <= 10 -#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsmAnalysis z_CSRGDeviceCsrsmAnalysis -#define cusparseTcsrmv cusparseZcsrmv -#define cusparseTcsrsv_solve cusparseZcsrsv_solve -#define cusparseTcsrsv_analysis cusparseZcsrsv_analysis -#define T_HYBGDeviceMat z_HYBGDeviceMat -#define T_Hmat z_Hmat -#define T_HYBGDeviceFree z_HYBGDeviceFree -#define T_spmvHYBGDevice z_spmvHYBGDevice -#define T_HYBGDeviceAlloc z_HYBGDeviceAlloc -#define T_HYBGDeviceSetMatDiagType z_HYBGDeviceSetMatDiagType -#define T_HYBGDeviceSetMatIndexBase z_HYBGDeviceSetMatIndexBase -#define T_HYBGDeviceSetMatType z_HYBGDeviceSetMatType -#define T_HYBGDeviceSetMatFillMode z_HYBGDeviceSetMatFillMode -#define T_HYBGDeviceHybsmAnalysis z_HYBGDeviceHybsmAnalysis -#define T_spsvHYBGDevice z_spsvHYBGDevice -#define T_HYBGHost2Device z_HYBGHost2Device -#define cusparseThybmv cusparseZhybmv -#define cusparseThybsv_solve cusparseZhybsv_solve -#define cusparseThybsv_analysis cusparseZhybsv_analysis -#define cusparseTcsr2hyb cusparseZcsr2hyb - -#elif CUDA_VERSION < 11030 - -#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsv2Analysis z_CSRGDeviceCsrsv2Analysis -#define cusparseTcsrsv2_bufferSize cusparseZcsrsv2_bufferSize -#define cusparseTcsrsv2_analysis cusparseZcsrsv2_analysis -#define cusparseTcsrsv2_solve cusparseZcsrsv2_solve -#else - -#define T_CSRGIsNullSvBuffer z_CSRGIsNullSvBuffer -#define T_CSRGIsNullSvDescr z_CSRGIsNullSvDescr -#define T_CSRGIsNullMvDescr z_CSRGIsNullMvDescr -#define T_CSRGCreateSpMVDescr z_CSRGCreateSpMVDescr - -#endif - +#include "zcusparse.h" #include "fcusparse_fct.h" - diff --git a/cuda/zcusparse.h b/cuda/zcusparse.h new file mode 100644 index 00000000..364e8ecc --- /dev/null +++ b/cuda/zcusparse.h @@ -0,0 +1,101 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef ZCUSPARSE_ +#define ZCUSPARSE_ + + +#include +#include + +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + + +/* Double precision real */ +#define TYPE double complex +#define CUSPARSE_BASE_TYPE CUDA_C_64F +#define T_CSRGDeviceMat z_CSRGDeviceMat +#define T_Cmat z_Cmat +#define T_spmvCSRGDevice z_spmvCSRGDevice +#define T_spsvCSRGDevice z_spsvCSRGDevice +#define T_CSRGDeviceAlloc z_CSRGDeviceAlloc +#define T_CSRGDeviceFree z_CSRGDeviceFree +#define T_CSRGHost2Device z_CSRGHost2Device +#define T_CSRGDevice2Host z_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode z_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType z_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms z_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis z_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseZcsrmv +#define cusparseTcsrsv_solve cusparseZcsrsv_solve +#define cusparseTcsrsv_analysis cusparseZcsrsv_analysis +#define T_HYBGDeviceMat z_HYBGDeviceMat +#define T_Hmat z_Hmat +#define T_HYBGDeviceFree z_HYBGDeviceFree +#define T_spmvHYBGDevice z_spmvHYBGDevice +#define T_HYBGDeviceAlloc z_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType z_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase z_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType z_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode z_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis z_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice z_spsvHYBGDevice +#define T_HYBGHost2Device z_HYBGHost2Device +#define cusparseThybmv cusparseZhybmv +#define cusparseThybsv_solve cusparseZhybsv_solve +#define cusparseThybsv_analysis cusparseZhybsv_analysis +#define cusparseTcsr2hyb cusparseZcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis z_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseZcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseZcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseZcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer z_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr z_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr z_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr z_CSRGCreateSpMVDescr + +#endif + +#endif + From 2e3f862e4292277e3c6ec43b332251fab4096496 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 21 May 2024 12:31:02 +0200 Subject: [PATCH 102/110] Start refactoring cusparse.h --- cuda/ccusparse.h | 4 ++-- cuda/dcusparse.h | 4 ++-- cuda/fcusparse.h | 3 +++ cuda/fcusparse_fct.h | 4 ++-- cuda/scusparse.h | 6 ++---- cuda/zcusparse.h | 5 ++--- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cuda/ccusparse.h b/cuda/ccusparse.h index f101c73d..a5380a0a 100644 --- a/cuda/ccusparse.h +++ b/cuda/ccusparse.h @@ -38,8 +38,6 @@ #include #include #include "cintrf.h" -#include "fcusparse.h" - /* Double precision real */ #define TYPE float complex @@ -97,4 +95,6 @@ #endif +#include "fcusparse.h" + #endif diff --git a/cuda/dcusparse.h b/cuda/dcusparse.h index d7875650..34849b1e 100644 --- a/cuda/dcusparse.h +++ b/cuda/dcusparse.h @@ -38,8 +38,6 @@ #include #include #include "cintrf.h" -#include "fcusparse.h" - /* Double precision real */ #define TYPE double @@ -97,5 +95,7 @@ #endif +#include "fcusparse.h" + #endif diff --git a/cuda/fcusparse.h b/cuda/fcusparse.h index 7d2972f8..6ef22db2 100644 --- a/cuda/fcusparse.h +++ b/cuda/fcusparse.h @@ -65,4 +65,7 @@ cusparseHandle_t *getHandle(); } \ } + #endif + + diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 28d900da..ecba8484 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -27,7 +27,7 @@ /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /* POSSIBILITY OF SUCH DAMAGE. */ - + typedef struct T_CSRGDeviceMat { #if CUDA_SHORT_VERSION <= 10 @@ -129,7 +129,7 @@ int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, int *irp, int *ja, TYPE *val); #endif - + int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, TYPE beta, void *deviceY) { diff --git a/cuda/scusparse.h b/cuda/scusparse.h index 724bdc2a..479f5069 100644 --- a/cuda/scusparse.h +++ b/cuda/scusparse.h @@ -31,7 +31,6 @@ #ifndef SCUSPARSE_ #define SCUSPARSE_ - #include #include @@ -39,8 +38,6 @@ #include #include #include "cintrf.h" -#include "fcusparse.h" - /* Double precision real */ #define TYPE float @@ -98,5 +95,6 @@ #endif -#endif +#include "fcusparse.h" +#endif diff --git a/cuda/zcusparse.h b/cuda/zcusparse.h index 364e8ecc..b63aa103 100644 --- a/cuda/zcusparse.h +++ b/cuda/zcusparse.h @@ -38,8 +38,6 @@ #include #include #include "cintrf.h" -#include "fcusparse.h" - /* Double precision real */ #define TYPE double complex @@ -97,5 +95,6 @@ #endif -#endif +#include "fcusparse.h" +#endif From d71d355b6858fa27c5cbfcdba3a32ab9d25f16c0 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 21 May 2024 12:59:44 +0200 Subject: [PATCH 103/110] Refactor cusparse includes.. --- cuda/ccusparse.c | 1 + cuda/dcusparse.c | 1 + cuda/fcusparse_dat.h | 136 +++++++++++++++++++++++++++++++++++++++++++ cuda/fcusparse_fct.h | 102 -------------------------------- cuda/scusparse.c | 2 +- cuda/zcusparse.c | 1 + 6 files changed, 140 insertions(+), 103 deletions(-) create mode 100644 cuda/fcusparse_dat.h diff --git a/cuda/ccusparse.c b/cuda/ccusparse.c index 3a6556ce..a5fee796 100644 --- a/cuda/ccusparse.c +++ b/cuda/ccusparse.c @@ -39,4 +39,5 @@ #include "fcusparse.h" #include "ccusparse.h" +#include "fcusparse_dat.h" #include "fcusparse_fct.h" diff --git a/cuda/dcusparse.c b/cuda/dcusparse.c index 6bb726c9..41f93603 100644 --- a/cuda/dcusparse.c +++ b/cuda/dcusparse.c @@ -39,4 +39,5 @@ #include "fcusparse.h" #include "dcusparse.h" +#include "fcusparse_dat.h" #include "fcusparse_fct.h" diff --git a/cuda/fcusparse_dat.h b/cuda/fcusparse_dat.h new file mode 100644 index 00000000..4b7cd6ce --- /dev/null +++ b/cuda/fcusparse_dat.h @@ -0,0 +1,136 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ +#ifndef FCUSPARSE_DAT_ +#define FCUSPARSE_DAT_ + + +typedef struct T_CSRGDeviceMat +{ +#if CUDA_SHORT_VERSION <= 10 + cusparseMatDescr_t descr; + cusparseSolveAnalysisInfo_t triang; +#elif CUDA_VERSION < 11030 + cusparseMatDescr_t descr; + csrsv2Info_t triang; + size_t mvbsize, svbsize; + void *mvbuffer, *svbuffer; +#else + cusparseSpMatDescr_t *spmvDescr; + cusparseSpSVDescr_t *spsvDescr; + size_t mvbsize, svbsize; + void *mvbuffer, *svbuffer; +#endif + int m, n, nz; + TYPE *val; + int *irp; + int *ja; +} T_CSRGDeviceMat; + +/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ +typedef struct T_Cmat +{ + T_CSRGDeviceMat *mat; +} T_Cmat; + +#if CUDA_SHORT_VERSION <= 10 +typedef struct T_HYBGDeviceMat +{ + cusparseMatDescr_t descr; + cusparseSolveAnalysisInfo_t triang; + cusparseHybMat_t hybA; + int m, n, nz; + TYPE *val; + int *irp; + int *ja; +} T_HYBGDeviceMat; + + +/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ +typedef struct T_Hmat +{ + T_HYBGDeviceMat *mat; +} T_Hmat; +#endif + +int T_spmvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_spsvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_CSRGDeviceAlloc(T_Cmat *Mat,int nr, int nc, int nz); +int T_CSRGDeviceFree(T_Cmat *Mat); + + +int T_CSRGHost2Device(T_Cmat *Mat, int m, int n, int nz, + int *irp, int *ja, TYPE *val); +int T_CSRGDevice2Host(T_Cmat *Mat, int m, int n, int nz, + int *irp, int *ja, TYPE *val); + +int T_CSRGDeviceGetParms(T_Cmat *Mat,int *nr, int *nc, int *nz); + +#if CUDA_SHORT_VERSION <= 10 +int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); +int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Mat); +#elif CUDA_VERSION < 11030 +int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); +#else + +int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat); +#endif + + + +#if CUDA_SHORT_VERSION <= 10 + + +int T_HYBGDeviceFree(T_Hmat *Matrix); +int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz); +int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type); +int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix); +int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val); +#endif + +#endif diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index ecba8484..12be21bd 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -28,108 +28,6 @@ /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /* POSSIBILITY OF SUCH DAMAGE. */ -typedef struct T_CSRGDeviceMat -{ -#if CUDA_SHORT_VERSION <= 10 - cusparseMatDescr_t descr; - cusparseSolveAnalysisInfo_t triang; -#elif CUDA_VERSION < 11030 - cusparseMatDescr_t descr; - csrsv2Info_t triang; - size_t mvbsize, svbsize; - void *mvbuffer, *svbuffer; -#else - cusparseSpMatDescr_t *spmvDescr; - cusparseSpSVDescr_t *spsvDescr; - size_t mvbsize, svbsize; - void *mvbuffer, *svbuffer; -#endif - int m, n, nz; - TYPE *val; - int *irp; - int *ja; -} T_CSRGDeviceMat; - -/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ -typedef struct T_Cmat -{ - T_CSRGDeviceMat *mat; -} T_Cmat; - -#if CUDA_SHORT_VERSION <= 10 -typedef struct T_HYBGDeviceMat -{ - cusparseMatDescr_t descr; - cusparseSolveAnalysisInfo_t triang; - cusparseHybMat_t hybA; - int m, n, nz; - TYPE *val; - int *irp; - int *ja; -} T_HYBGDeviceMat; - - -/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ -typedef struct T_Hmat -{ - T_HYBGDeviceMat *mat; -} T_Hmat; -#endif - -int T_spmvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, - TYPE beta, void *deviceY); -int T_spsvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, - TYPE beta, void *deviceY); -int T_CSRGDeviceAlloc(T_Cmat *Mat,int nr, int nc, int nz); -int T_CSRGDeviceFree(T_Cmat *Mat); - - -int T_CSRGHost2Device(T_Cmat *Mat, int m, int n, int nz, - int *irp, int *ja, TYPE *val); -int T_CSRGDevice2Host(T_Cmat *Mat, int m, int n, int nz, - int *irp, int *ja, TYPE *val); - -int T_CSRGDeviceGetParms(T_Cmat *Mat,int *nr, int *nc, int *nz); - -#if CUDA_SHORT_VERSION <= 10 -int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); -int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); -int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); -int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); -int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Mat); -#elif CUDA_VERSION < 11030 -int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); -int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); -int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); -int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); -#else - -int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat); -int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat); -int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat); -int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat); -#endif - - - -#if CUDA_SHORT_VERSION <= 10 - - -int T_HYBGDeviceFree(T_Hmat *Matrix); -int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, - TYPE beta, void *deviceY); -int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz); -int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type); -int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type); -int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type); -int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type); -int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix); -int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, - TYPE beta, void *deviceY); -int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, - int *irp, int *ja, TYPE *val); -#endif - int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, TYPE beta, void *deviceY) { diff --git a/cuda/scusparse.c b/cuda/scusparse.c index f54a35fd..da21506c 100644 --- a/cuda/scusparse.c +++ b/cuda/scusparse.c @@ -39,5 +39,5 @@ #include "fcusparse.h" #include "scusparse.h" +#include "fcusparse_dat.h" #include "fcusparse_fct.h" - diff --git a/cuda/zcusparse.c b/cuda/zcusparse.c index 2ae0c04e..62b94b3b 100644 --- a/cuda/zcusparse.c +++ b/cuda/zcusparse.c @@ -39,4 +39,5 @@ #include "fcusparse.h" #include "zcusparse.h" +#include "fcusparse_dat.h" #include "fcusparse_fct.h" From a177e94ba5f7fa97dab023357a17e90704dc50ff Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 21 May 2024 17:49:40 +0200 Subject: [PATCH 104/110] Fix comments, --- base/modules/desc/psb_desc_mod.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/modules/desc/psb_desc_mod.F90 b/base/modules/desc/psb_desc_mod.F90 index 226d9d2b..ad0bc74c 100644 --- a/base/modules/desc/psb_desc_mod.F90 +++ b/base/modules/desc/psb_desc_mod.F90 @@ -409,7 +409,7 @@ contains ! ! Since the hashed lists take up (somewhat) more than 2*N_COL integers, ! it makes no sense to use them if you don't have at least - ! 3 processes, no matter what the size of the process. + ! 3 processes, no matter what the size of the index space. ! val = psb_cd_is_large_size(m) .and. (np > 2) end function psb_cd_choose_large_state From 42293c62b6c8b731f64f38c0b9b989dd474cfbc7 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 21 May 2024 17:49:48 +0200 Subject: [PATCH 105/110] Fix usage of sync() --- base/serial/impl/psb_c_base_mat_impl.F90 | 3 +++ base/serial/impl/psb_d_base_mat_impl.F90 | 3 +++ base/serial/impl/psb_s_base_mat_impl.F90 | 3 +++ base/serial/impl/psb_z_base_mat_impl.F90 | 3 +++ 4 files changed, 12 insertions(+) diff --git a/base/serial/impl/psb_c_base_mat_impl.F90 b/base/serial/impl/psb_c_base_mat_impl.F90 index f4bc43cc..fc170a6a 100644 --- a/base/serial/impl/psb_c_base_mat_impl.F90 +++ b/base/serial/impl/psb_c_base_mat_impl.F90 @@ -2060,6 +2060,8 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2080,6 +2082,7 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) diff --git a/base/serial/impl/psb_d_base_mat_impl.F90 b/base/serial/impl/psb_d_base_mat_impl.F90 index 1a8dc084..01a942d1 100644 --- a/base/serial/impl/psb_d_base_mat_impl.F90 +++ b/base/serial/impl/psb_d_base_mat_impl.F90 @@ -2060,6 +2060,8 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2080,6 +2082,7 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) diff --git a/base/serial/impl/psb_s_base_mat_impl.F90 b/base/serial/impl/psb_s_base_mat_impl.F90 index 0b47d472..205d0358 100644 --- a/base/serial/impl/psb_s_base_mat_impl.F90 +++ b/base/serial/impl/psb_s_base_mat_impl.F90 @@ -2060,6 +2060,8 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2080,6 +2082,7 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) diff --git a/base/serial/impl/psb_z_base_mat_impl.F90 b/base/serial/impl/psb_z_base_mat_impl.F90 index 2d97f698..915993f0 100644 --- a/base/serial/impl/psb_z_base_mat_impl.F90 +++ b/base/serial/impl/psb_z_base_mat_impl.F90 @@ -2060,6 +2060,8 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2080,6 +2082,7 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) From b9ad357648e3b719ea6b35b2ff1ffa364e6a9eba Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 May 2024 16:25:49 +0200 Subject: [PATCH 106/110] Improve temp memory allocation in fix_coo --- base/serial/impl/psb_c_coo_impl.F90 | 2 +- base/serial/impl/psb_d_coo_impl.F90 | 2 +- base/serial/impl/psb_s_coo_impl.F90 | 2 +- base/serial/impl/psb_z_coo_impl.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index d6117546..1a1cb3ab 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index 4cb4c3ec..0f7da5c0 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index f706db33..c6340a91 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index c368ce91..87e2c8d8 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) From a38867be25d96751bb9e9dbdb4ce11cca6fe517c Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 May 2024 16:51:58 +0200 Subject: [PATCH 107/110] Fix allocation in coo_impl --- base/serial/impl/psb_c_coo_impl.F90 | 2 +- base/serial/impl/psb_d_coo_impl.F90 | 2 +- base/serial/impl/psb_s_coo_impl.F90 | 2 +- base/serial/impl/psb_z_coo_impl.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 1a1cb3ab..b97f493c 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index 0f7da5c0..c8a22914 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index c6340a91..4b05cb17 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 87e2c8d8..4b15ef84 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) From 39cfcd3893f9770308b14ad253c92614fe24fade Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 May 2024 16:54:10 +0200 Subject: [PATCH 108/110] Fix allocation in coo_impl --- base/serial/impl/psb_c_coo_impl.F90 | 2 +- base/serial/impl/psb_d_coo_impl.F90 | 2 +- base/serial/impl/psb_s_coo_impl.F90 | 2 +- base/serial/impl/psb_z_coo_impl.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index b97f493c..ccf7e42d 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index c8a22914..beb438aa 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 4b05cb17..d833cf96 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 4b15ef84..ea6b2492 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) From e3a55967a560802d8543f33057b9bc78d6a41371 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Sun, 23 Jun 2024 16:03:10 +0200 Subject: [PATCH 109/110] Modify CUDA code to compile with 12.4/12.5 --- cuda/CUDA/psi_cuda_CopyCooToElg.cuh | 19 +++++++++---------- cuda/CUDA/psi_cuda_CopyCooToHlg.cuh | 2 -- cuda/CUDA/psi_cuda_c_CopyCooToElg.cu | 4 +--- cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu | 3 --- cuda/CUDA/psi_cuda_d_CopyCooToElg.cu | 3 --- cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu | 3 --- cuda/CUDA/psi_cuda_s_CopyCooToElg.cu | 3 --- cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu | 3 --- cuda/CUDA/psi_cuda_z_CopyCooToElg.cu | 3 --- cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu | 3 --- cuda/ccusparse.c | 1 - cuda/cintrf.h | 7 +++---- cuda/dcusparse.c | 1 - cuda/diagdev.h | 1 + cuda/dnsdev.h | 1 + cuda/elldev.h | 1 + cuda/fcusparse.c | 1 - cuda/fcusparse.h | 1 + cuda/hdiagdev.h | 1 + cuda/hlldev.h | 2 +- cuda/scusparse.c | 1 - cuda/vectordev.h | 4 ++-- cuda/zcusparse.c | 1 - 23 files changed, 21 insertions(+), 48 deletions(-) diff --git a/cuda/CUDA/psi_cuda_CopyCooToElg.cuh b/cuda/CUDA/psi_cuda_CopyCooToElg.cuh index 98aac050..5b723be1 100644 --- a/cuda/CUDA/psi_cuda_CopyCooToElg.cuh +++ b/cuda/CUDA/psi_cuda_CopyCooToElg.cuh @@ -1,8 +1,6 @@ #include #include -#include "cintrf.h" -#include "vectordev.h" #include "psi_cuda_common.cuh" @@ -62,11 +60,9 @@ __global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, ir += ldv; } idiag[i]=idval; - } - void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(spgpuHandle_t handle, int nrws, int i, int nr, int nza, int baseIdx, int hacksz, int ldv, int nzm, int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, @@ -76,8 +72,10 @@ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(spgpuHandle_t handle, int nrws, i dim3 grid ((nrws + THREAD_BLOCK - 1) / THREAD_BLOCK); CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn) - <<< grid, block, 0, handle->currentStream >>>(i,nrws, nr, nza, baseIdx, hacksz, ldv, nzm, - rS,devIdisp,devJa,devVal,idiag, rP,cM); + <<< grid, block, 0, handle->currentStream >>>(i,nrws, nr, nza, baseIdx, + hacksz, ldv, nzm, + rS,devIdisp,devJa,devVal, + idiag, rP,cM); } @@ -89,16 +87,17 @@ GEN_PSI_FUNC_NAME(TYPE_SYMBOL) (spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm, int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, int *idiag, int *rP, VALUE_TYPE *cM) -{ int i,j, nrws; +{ int i, nrws; //int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX; int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); - //fprintf(stderr,"Loop on j: %d\n",j); for (i=0; i #include -#include "cintrf.h" -#include "vectordev.h" #include "psi_cuda_common.cuh" diff --git a/cuda/CUDA/psi_cuda_c_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_c_CopyCooToElg.cu index e069ff1e..651a8d9f 100644 --- a/cuda/CUDA/psi_cuda_c_CopyCooToElg.cu +++ b/cuda/CUDA/psi_cuda_c_CopyCooToElg.cu @@ -2,9 +2,7 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE cuFloatComplex #define TYPE_SYMBOL c #include "psi_cuda_CopyCooToElg.cuh" + diff --git a/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu index f2b5c86d..e36728b1 100644 --- a/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu +++ b/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu @@ -2,9 +2,6 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE cuFloatComplex #define TYPE_SYMBOL c #include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu index f306ffe1..233bae06 100644 --- a/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu +++ b/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu @@ -2,9 +2,6 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE double #define TYPE_SYMBOL d #include "psi_cuda_CopyCooToElg.cuh" diff --git a/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu index 9c0e371e..94e076ae 100644 --- a/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu +++ b/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu @@ -2,9 +2,6 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE double #define TYPE_SYMBOL d #include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu index 76e10de1..e083708c 100644 --- a/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu +++ b/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu @@ -2,9 +2,6 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE float #define TYPE_SYMBOL s #include "psi_cuda_CopyCooToElg.cuh" diff --git a/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu index c2d76c0a..90ad5fdf 100644 --- a/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu +++ b/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu @@ -2,9 +2,6 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE float #define TYPE_SYMBOL s #include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu index a57ad637..b5ec817d 100644 --- a/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu +++ b/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu @@ -2,9 +2,6 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE cuDoubleComplex #define TYPE_SYMBOL z #include "psi_cuda_CopyCooToElg.cuh" diff --git a/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu index 2ff9b869..24d39ec4 100644 --- a/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu +++ b/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu @@ -2,9 +2,6 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE cuDoubleComplex #define TYPE_SYMBOL z #include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/ccusparse.c b/cuda/ccusparse.c index a5fee796..6b5c8ea6 100644 --- a/cuda/ccusparse.c +++ b/cuda/ccusparse.c @@ -35,7 +35,6 @@ #include #include -#include "cintrf.h" #include "fcusparse.h" #include "ccusparse.h" diff --git a/cuda/cintrf.h b/cuda/cintrf.h index 3a1f6476..7119378a 100644 --- a/cuda/cintrf.h +++ b/cuda/cintrf.h @@ -37,10 +37,9 @@ #include #include "core.h" -#include "cuda_util.h" -#include "vector.h" -#include "vectordev.h" - +//#include "cuda_util.h" +//#include "vector.h" +//#include "vectordev.h" #define ELL_PITCH_ALIGN_S 32 #define ELL_PITCH_ALIGN_D 16 diff --git a/cuda/dcusparse.c b/cuda/dcusparse.c index 41f93603..9af4ce38 100644 --- a/cuda/dcusparse.c +++ b/cuda/dcusparse.c @@ -35,7 +35,6 @@ #include #include -#include "cintrf.h" #include "fcusparse.h" #include "dcusparse.h" diff --git a/cuda/diagdev.h b/cuda/diagdev.h index 2efbea92..3a062fb7 100644 --- a/cuda/diagdev.h +++ b/cuda/diagdev.h @@ -33,6 +33,7 @@ #define _DIAGDEV_H_ #include "cintrf.h" +#include "vectordev.h" #include "dia.h" struct DiagDevice diff --git a/cuda/dnsdev.h b/cuda/dnsdev.h index 1c335bf9..aa536105 100644 --- a/cuda/dnsdev.h +++ b/cuda/dnsdev.h @@ -34,6 +34,7 @@ #define _DNSDEV_H_ #include "cintrf.h" +#include "vectordev.h" #include "cuComplex.h" #include "cublas_v2.h" diff --git a/cuda/elldev.h b/cuda/elldev.h index 5305057a..4e69bb3a 100644 --- a/cuda/elldev.h +++ b/cuda/elldev.h @@ -34,6 +34,7 @@ #define _ELLDEV_H_ #include "cintrf.h" +#include "vectordev.h" #include "cuComplex.h" #include "ell.h" diff --git a/cuda/fcusparse.c b/cuda/fcusparse.c index e8e46c63..094348ce 100644 --- a/cuda/fcusparse.c +++ b/cuda/fcusparse.c @@ -34,7 +34,6 @@ #include #include -#include "cintrf.h" #include "fcusparse.h" static cusparseHandle_t *cusparse_handle=NULL; diff --git a/cuda/fcusparse.h b/cuda/fcusparse.h index 6ef22db2..02a45fa3 100644 --- a/cuda/fcusparse.h +++ b/cuda/fcusparse.h @@ -40,6 +40,7 @@ #include #endif #include "cintrf.h" +#include "vectordev.h" int FcusparseCreate(); int FcusparseDestroy(); diff --git a/cuda/hdiagdev.h b/cuda/hdiagdev.h index 5cd9f803..c02fcc69 100644 --- a/cuda/hdiagdev.h +++ b/cuda/hdiagdev.h @@ -33,6 +33,7 @@ #define _HDIAGDEV_H_ #include "cintrf.h" +#include "vectordev.h" #include "hdia.h" struct HdiagDevice diff --git a/cuda/hlldev.h b/cuda/hlldev.h index e4f8259e..3b47f5ea 100644 --- a/cuda/hlldev.h +++ b/cuda/hlldev.h @@ -34,7 +34,7 @@ #include "cintrf.h" #include "hell.h" - +#include "vectordev.h" typedef struct hlldevice { diff --git a/cuda/scusparse.c b/cuda/scusparse.c index da21506c..b0229907 100644 --- a/cuda/scusparse.c +++ b/cuda/scusparse.c @@ -35,7 +35,6 @@ #include #include -#include "cintrf.h" #include "fcusparse.h" #include "scusparse.h" diff --git a/cuda/vectordev.h b/cuda/vectordev.h index 8eca7063..df5fbd82 100644 --- a/cuda/vectordev.h +++ b/cuda/vectordev.h @@ -28,13 +28,12 @@ /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /* POSSIBILITY OF SUCH DAMAGE. */ - #pragma once //#include "utils.h" #include "cuda_runtime.h" //#include "common.h" -#include "cintrf.h" +//#include "cintrf.h" #include struct MultiVectDevice @@ -85,3 +84,4 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams * int getMultiVecDeviceSize(void* deviceVec); int getMultiVecDeviceCount(void* deviceVec); int getMultiVecDevicePitch(void* deviceVec); + diff --git a/cuda/zcusparse.c b/cuda/zcusparse.c index 62b94b3b..93142d22 100644 --- a/cuda/zcusparse.c +++ b/cuda/zcusparse.c @@ -35,7 +35,6 @@ #include #include -#include "cintrf.h" #include "fcusparse.h" #include "zcusparse.h" From 9f2b8a2623a5b7c5e3cbfdac29d45cdf8fd3e7c7 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 24 Jun 2024 08:18:07 +0200 Subject: [PATCH 110/110] Cleanup --- cuda/cintrf.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cuda/cintrf.h b/cuda/cintrf.h index 7119378a..58778e6c 100644 --- a/cuda/cintrf.h +++ b/cuda/cintrf.h @@ -37,9 +37,7 @@ #include #include "core.h" -//#include "cuda_util.h" -//#include "vector.h" -//#include "vectordev.h" + #define ELL_PITCH_ALIGN_S 32 #define ELL_PITCH_ALIGN_D 16